#I am using my Python 3 11 scanorama env
! python --version
Python 3.11.9
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
import matplotlib.patches as patches
import seaborn as sns
from pathlib import Path
import scanpy as sc
import anndata as an
import scanorama
import re
import math
import squidpy as sq
import scipy.sparse
from scipy.stats import zscore
from scipy.ndimage import zoom, gaussian_filter
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from shapely.geometry import Polygon, MultiPolygon, Point
from adjustText import adjust_text
import warnings
import geopandas as gpd
from scipy.stats import zscore
from scipy.signal import savgol_filter
from scipy.cluster.hierarchy import linkage, leaves_list
sc.logging.print_versions()
----- anndata 0.10.7 scanpy 1.10.0 ----- PIL 10.3.0 adjustText 1.3.0 annoy NA anyio NA asciitree NA asttokens NA attr 23.1.0 attrs 23.1.0 babel 2.11.0 brotli 1.0.9 certifi 2024.02.02 cffi 1.16.0 charset_normalizer 2.0.4 cloudpickle 3.0.0 colorama 0.4.6 comm 0.2.1 cycler 0.12.1 cython_runtime NA dask 2024.5.0 dask_expr 1.1.0 dask_image 2023.08.1 datashader 0.16.1 datatree 0.0.14 dateutil 2.8.2 debugpy 1.6.7 decorator 5.1.1 defusedxml 0.7.1 docrep 0.3.2 executing 0.8.3 fastjsonschema NA fbpca NA fsspec 2023.6.0 geopandas 0.14.4 h5py 3.11.0 idna 3.7 igraph 0.11.5 imageio 2.34.1 intervaltree NA ipykernel 6.28.0 ipywidgets 8.1.2 jedi 0.18.1 jinja2 3.1.3 joblib 1.4.2 json5 NA jsonschema 4.19.2 jsonschema_specifications NA jupyter_events 0.8.0 jupyter_server 2.10.0 jupyterlab_server 2.25.1 kiwisolver 1.4.5 lazy_loader 0.4 legacy_api_wrap NA leidenalg 0.10.2 llvmlite 0.42.0 markupsafe 2.1.3 matplotlib 3.8.2 matplotlib_scalebar 0.8.1 mpl_toolkits NA msgpack 1.0.8 multipledispatch 0.6.0 multiscale_spatial_image 0.11.2 natsort 8.4.0 nbformat 5.9.2 networkx 3.3 numba 0.59.1 numcodecs 0.12.1 numpy 1.26.3 ome_zarr NA overrides NA packaging 23.2 pandas 2.2.0 param 2.1.0 parso 0.8.3 patsy 0.5.6 pkg_resources NA platformdirs 3.10.0 prometheus_client NA prompt_toolkit 3.0.43 psutil 5.9.0 pure_eval 0.2.2 pyarrow 16.0.0 pycparser 2.21 pyct 0.5.0 pydev_ipython NA pydevconsole NA pydevd 2.9.5 pydevd_file_utils NA pydevd_plugins NA pydevd_tracing NA pygeos 0.14 pygments 2.15.1 pyparsing 3.1.2 pyproj 3.6.1 pythoncom NA pythonjsonlogger NA pytz 2024.1 pywintypes NA referencing NA requests 2.31.0 rfc3339_validator 0.1.4 rfc3986_validator 0.1.1 rich NA rpds NA scanorama 1.7.4 scipy 1.13.0 seaborn 0.13.1 send2trash NA session_info 1.0.0 setuptools 69.5.1 shapely 2.0.4 six 1.16.0 skimage 0.23.2 sklearn 1.4.2 sniffio 1.3.0 socks 1.7.1 sortedcontainers 2.4.0 spatial_image 0.3.0 spatialdata 0.0.15 squidpy 1.4.1 stack_data 0.2.0 statsmodels 0.14.2 tblib 3.0.0 texttable 1.7.0 threadpoolctl 3.5.0 tifffile 2024.5.10 tlz 0.12.1 toolz 0.12.1 tornado 6.3.3 tqdm 4.66.4 traitlets 5.7.1 typing_extensions NA urllib3 1.26.18 validators 0.28.1 wcwidth 0.2.5 websocket 0.58.0 win32api NA win32com NA win32con NA win32trace NA winerror NA xarray 2023.12.0 xarray_dataclasses 1.7.0 xarray_schema 0.0.3 xrspatial 0.4.0 yaml 6.0.1 zarr 2.18.0 zipp NA zmq 25.1.2 ----- IPython 8.20.0 jupyter_client 8.6.0 jupyter_core 5.5.0 jupyterlab 4.0.11 notebook 7.0.8 ----- Python 3.11.9 | packaged by Anaconda, Inc. | (main, Apr 19 2024, 16:40:41) [MSC v.1916 64 bit (AMD64)] Windows-10-10.0.22631-SP0 ----- Session information updated at 2024-12-14 11:51
warnings.simplefilter("ignore")
VGN1e1 = late double ridge, VRT-A2a (P1 WT)
VGN1e9 = late double ridge, VRT-A2b (P1 Pol)
VGN1b6 = lemma primorida, VRT-A2a (P1 WT)
VGN1b8 = lemma primorida, VRT-A2b (P1 Pol)
VGN1a6 = terminal spikelet, VRT-A2a (P1 WT)
VGN1a4 = terminal spikelet, VRT-A2b (P1 Pol)
VGN1c2 = carpel extension round, VRT-A2a (P1 WT)
VGN1c3 = carpel extension round, VRT-A2b (P1 Pol)
# Extract cell identifiers and cluster assignments for later uses
cell_cluster_assignments = adata_spatial.obs[['clusters']]
# Optionally reset the index to get the cell IDs as a column
cell_cluster_assignments.reset_index(inplace=True)
# Rename columns for clarity
cell_cluster_assignments.columns = ['cell_id', 'cluster']
# Save to a DataFrame
df_cell_cluster_assignments = pd.DataFrame(cell_cluster_assignments)
# Ensure that 'cell_id' is treated as a string column
df_cell_cluster_assignments['cell_id'] = df_cell_cluster_assignments['cell_id'].astype(str)
# Split the 'cell_id' column by '-' and expand into two columns
split_columns = df_cell_cluster_assignments['cell_id'].str.split('-', expand=True)
# Assign the first part to 'cell_id' and the second part to 'sample'
df_cell_cluster_assignments['cell_id'] = split_columns[0]
df_cell_cluster_assignments['sample'] = split_columns[1]
df_cell_cluster_assignments.set_index('cell_id', inplace=True)
Analysing Domain and Cell Number in WT LDR + LP spikes¶
Starting with late double ridge WT spike
#extracting the cells in the inflorescence only, group created in MERSCOPE Visualiser tool
cellsinspike = pd.read_csv('latedoubleridge_expressionanalysis/VGN1e1_allcellsinspike.csv')
cellsinspike.rename(columns={cellsinspike.columns[0]: 'cell_id'}, inplace=True)
cellsinspike['cell_id'] = cellsinspike['cell_id'].astype(str)
cellsinspike_VGN1e1 = cellsinspike.copy()
#filter my dataframe so that only cells in the spike are included
cell_id_list = cellsinspike_VGN1e1['cell_id'].tolist()
# Filter combined_df to only include rows where the index is in the cell_id_list
cluster_VGN1e1 = df_cell_cluster_assignments[df_cell_cluster_assignments.index.isin(cell_id_list)]
unique_clusters_count = cluster_VGN1e1['cluster'].nunique()
print("Number of unique clusters in LDR inflorescence:", unique_clusters_count)
Number of unique clusters in LDR inflorescence: 10
# Calculate the total cell count per cluster, excluding clusters with 0 cells
cluster_counts = cluster_VGN1e1['cluster'].value_counts()
cluster_counts = cluster_counts[cluster_counts > 0] # Filter out clusters with 0 cells
# Calculate cumulative percentages
cumulative_percentage = cluster_counts.cumsum() / cluster_counts.sum()
# Print cumulative percentages for the top N domains
for i in range(1, len(cumulative_percentage) + 1):
percentage = cumulative_percentage.iloc[i - 1] * 100
print(f"The top {i} domains account for {percentage:.2f}% of cells.")
The top 1 domains account for 34.90% of cells. The top 2 domains account for 65.77% of cells. The top 3 domains account for 81.06% of cells. The top 4 domains account for 94.81% of cells. The top 5 domains account for 98.17% of cells. The top 6 domains account for 98.75% of cells. The top 7 domains account for 99.23% of cells. The top 8 domains account for 99.71% of cells. The top 9 domains account for 99.90% of cells. The top 10 domains account for 100.00% of cells.
Moving onto lemma primordia WT spike
#filter my dataframe so that only cells in the spike are included
cellsinspike = pd.read_csv('latedoubleridge_expressionanalysis/VGN1b6_allcellsinspike.csv')
cellsinspike.rename(columns={cellsinspike.columns[0]: 'cell_id'}, inplace=True)
cellsinspike['cell_id'] = cellsinspike['cell_id'].astype(str)
cellsinspike_VGN1b6 = cellsinspike.copy()
#filter my dataframe so that only cells in the spike are included
cell_id_list = cellsinspike_VGN1b6['cell_id'].tolist()
# Filter combined_df to only include rows where the index is in the cell_id_list
cluster_VGN1b6 = df_cell_cluster_assignments[df_cell_cluster_assignments.index.isin(cell_id_list)]
unique_clusters_count = cluster_VGN1b6['cluster'].nunique()
print("Number of unique clusters in LP inflorescence :", unique_clusters_count)
Number of unique clusters in LP inflorescence : 14
# Calculate the total cell count per cluster, excluding clusters with 0 cells
cluster_counts = cluster_VGN1b6['cluster'].value_counts()
cluster_counts = cluster_counts[cluster_counts > 0] # Filter out clusters with 0 cells
# Calculate cumulative percentages
cumulative_percentage = cluster_counts.cumsum() / cluster_counts.sum()
# Print cumulative percentages for the top N domains
for i in range(1, len(cumulative_percentage) + 1):
percentage = cumulative_percentage.iloc[i - 1] * 100
print(f"The top {i} domains account for {percentage:.2f}% of cells.")
The top 1 domains account for 31.13% of cells. The top 2 domains account for 54.58% of cells. The top 3 domains account for 67.66% of cells. The top 4 domains account for 76.46% of cells. The top 5 domains account for 82.52% of cells. The top 6 domains account for 87.40% of cells. The top 7 domains account for 90.96% of cells. The top 8 domains account for 94.07% of cells. The top 9 domains account for 96.45% of cells. The top 10 domains account for 98.79% of cells. The top 11 domains account for 99.27% of cells. The top 12 domains account for 99.64% of cells. The top 13 domains account for 99.88% of cells. The top 14 domains account for 100.00% of cells.
how many cells at leaf ridge base of LDR sample VGN1e1 compared to leaf ridge apex?¶
# I am reading in cell groups. I selected Leaf ridges (LRs) and spikelet ridges (SRs) and numbered them 1-13
#I performed the groupings in MERSCOPE Visualiser tool and exported to a .csv file
cellgroups = pd.read_csv('latedoubleridge_expressionanalysis/VGN1e_region1_cell_categoriesv3.csv', header=0, index_col=0)
cellgroups = cellgroups[['Custom cell groups']].copy()
# Count occurrences of each 'LR' group in the 'Custom cell groups' column
lr1_count = cellgroups['Custom cell groups'].value_counts().get('LR1', 0)
print("Number of entries under 'LR1':", lr1_count)
lr2_count = cellgroups['Custom cell groups'].value_counts().get('LR2', 0)
print("Number of entries under 'LR2':", lr2_count)
lr3_count = cellgroups['Custom cell groups'].value_counts().get('LR3', 0)
print("Number of entries under 'LR3':", lr3_count)
lr4_count = cellgroups['Custom cell groups'].value_counts().get('LR4', 0)
print("Number of entries under 'LR4':", lr4_count)
# Store the counts in a list
lr_counts = [lr1_count, lr2_count, lr3_count, lr4_count]
# Calculate the average
average_count = sum(lr_counts) / len(lr_counts)
# Calculate the standard deviation
variance = sum((x - average_count) ** 2 for x in lr_counts) / len(lr_counts)
std_dev = math.sqrt(variance)
# Format the result
print(f"Averaging {average_count:.2f} ± {std_dev:.2f} cells per section.")
Number of entries under 'LR1': 58 Number of entries under 'LR2': 32 Number of entries under 'LR3': 16 Number of entries under 'LR4': 24 Averaging 32.50 ± 15.77 cells per section.
# Count occurrences of each 'LR' group in the 'Custom cell groups' column
lr8_count = cellgroups['Custom cell groups'].value_counts().get('LR8', 0)
print("Number of entries under 'LR8':", lr8_count)
lr9_count = cellgroups['Custom cell groups'].value_counts().get('LR9', 0)
print("Number of entries under 'LR9':", lr9_count)
lr10_count = cellgroups['Custom cell groups'].value_counts().get('LR10', 0)
print("Number of entries under 'LR10':", lr10_count)
lr11_count = cellgroups['Custom cell groups'].value_counts().get('LR11', 0)
print("Number of entries under 'LR11':", lr11_count)
# Store the counts in a list
lr_counts = [lr8_count, lr9_count, lr10_count, lr11_count]
# Calculate the average
average_count = sum(lr_counts) / len(lr_counts)
# Calculate the standard deviation
variance = sum((x - average_count) ** 2 for x in lr_counts) / len(lr_counts)
std_dev = math.sqrt(variance)
# Format the result
print(f"Averaging {average_count:.2f} ± {std_dev:.2f} cells per section.")
Number of entries under 'LR8': 15 Number of entries under 'LR9': 12 Number of entries under 'LR10': 12 Number of entries under 'LR11': 11 Averaging 12.50 ± 1.50 cells per section.
Performing PCA analysis on SRs and LRs¶
# reading in gene name annotations for later
file_path = 'annotated_names_12_24.csv'
annotation = pd.read_csv(file_path, header = 0, index_col=0)
First I will extract normalised expression values, and average them within each leaf ridge or spikelet ridge group
(Supplementary File 14)#Read in the cleaned/filtered adata objects for my samples
# Define the filenames of the saved AnnData objects
adata_filenames = {
"adata_VGN1e1": "qc/adata_VGN1e1_clean.h5ad"
}
# Initialize a dictionary to store the loaded AnnData objects
loaded_adata_objects = {}
# Load each file and store it in the dictionary
for name, filename in adata_filenames.items():
loaded_adata_objects[name] = sc.read_h5ad(filename)
print(f"Loaded {name} from {filename}")
# Access the loaded AnnData objects
adata_VGN1e1 = loaded_adata_objects["adata_VGN1e1"]
Loaded adata_VGN1e1 from qc/adata_VGN1e1_clean.h5ad
# take the normalised expression score adata_VGN1e1
if isinstance(adata_VGN1e1.X, scipy.sparse.spmatrix):
X_dense = adata_VGN1e1.X.toarray()
else:
X_dense = adata_VGN1e1.X
# Create gene expression matrix dataframe
cell_names = adata_VGN1e1.obs_names
gene_names = adata_VGN1e1.var_names
gene_expression_matrix = pd.DataFrame(data=X_dense, index=cell_names, columns=gene_names)
#merge the cell groups and the gene expression matrix
#Step 1: make sure the index are both strings
gene_expression_matrix.index = gene_expression_matrix.index.astype(str)
cellgroups.index = cellgroups.index.astype(str)
# Step 2: Filter gene_expression_matrix to only include rows present in cellgroups
filtered_gene_expression = gene_expression_matrix[gene_expression_matrix.index.isin(cellgroups.index)]
# Step 3: Combine the two dataframes (matching by index)
combined_df = filtered_gene_expression.join(cellgroups, how='inner')
#Take the average expression score, grouped by the cell groups (LRs and SRs)
average_gene_expression_per_group = combined_df.groupby('Custom cell groups').mean()
#Filter the gene columns so that genes must have at least one expressions score above 0.30
filtered_average_gene_expression_per_group = average_gene_expression_per_group.loc[:, (average_gene_expression_per_group > 0.30).any()] #
#saving as supplementary table JK
filtered_average_gene_expression_per_group.to_csv("latedoubleridge_expressionanalysis/Averaged_NormalisedCountsperCell_SRLRgroups.csv", index=False)
filtered_average_gene_expression_per_group
| TraesCS1A02G306300 | TraesCS6A02G110100 | TraesCS4D02G017800 | TraesCS5A02G401800 | TraesCS6A02G335900 | TraesCS5B02G246700 | TraesCS4B02G064000 | TraesCS5B02G396600 | TraesCS2A02G174300 | TraesCS3D02G284200 | ... | TraesCS7A02G165600 | TraesCS7A02G292900 | TraesCS2B02G419200 | TraesCS6A02G373500 | TraesCS2B02G260800 | TraesCS1B02G283900 | TraesCS6A02G213700 | TraesCS2A02G323500 | TraesCS2B02G274200 | TraesCS1B02G042200 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Custom cell groups | |||||||||||||||||||||
| LR1 | 0.592195 | 0.312819 | 0.279014 | 0.068456 | 1.158507 | 0.453646 | 0.612996 | 0.643798 | 0.779635 | 0.102024 | ... | 0.559794 | 0.464847 | 0.070222 | 0.458490 | 0.818654 | 0.333066 | 0.156476 | 0.295254 | 0.510172 | 0.310218 |
| LR10 | 0.166783 | 0.116201 | 0.078862 | 0.906650 | 1.153218 | 0.344580 | 0.421055 | 0.231502 | 0.404571 | 0.990903 | ... | 0.050376 | 0.241367 | 0.226791 | 0.258849 | 0.965346 | 0.323857 | 0.050376 | 0.089797 | 0.554862 | 0.220196 |
| LR11 | 0.684832 | 0.273548 | 0.195358 | 0.593503 | 1.585243 | 0.406452 | 0.264138 | 0.634151 | 0.665057 | 1.176608 | ... | 0.000000 | 0.332223 | 0.117457 | 0.192981 | 1.054529 | 0.330327 | 0.097639 | 0.394213 | 0.407155 | 0.477761 |
| LR12 | 0.600684 | 0.128895 | 0.000000 | 0.710492 | 1.236984 | 0.299100 | 0.464234 | 0.296808 | 0.589380 | 1.459936 | ... | 0.081294 | 0.395374 | 0.251813 | 0.290275 | 1.173451 | 0.300095 | 0.114797 | 0.375703 | 0.638848 | 0.234809 |
| LR13 | 0.360574 | 0.389003 | 0.512968 | 0.841780 | 1.077243 | 0.437985 | 0.221659 | 0.449723 | 0.523461 | 1.443846 | ... | 0.046919 | 0.375277 | 0.076956 | 0.366786 | 1.291216 | 0.471330 | 0.089525 | 0.351535 | 0.505796 | 0.496151 |
| LR2 | 0.586304 | 0.261049 | 0.203035 | 0.036374 | 1.514485 | 0.472151 | 0.370508 | 0.798039 | 0.880551 | 0.207380 | ... | 0.513900 | 0.420392 | 0.041590 | 0.562996 | 1.259274 | 0.491463 | 0.185636 | 0.369325 | 0.514158 | 0.184136 |
| LR3 | 0.465635 | 0.170759 | 0.419600 | 0.044473 | 1.276095 | 0.581547 | 0.314242 | 0.460979 | 1.026664 | 0.408424 | ... | 0.353437 | 0.490555 | 0.048394 | 0.595601 | 1.121210 | 0.230566 | 0.183697 | 0.464794 | 0.289716 | 0.292080 |
| LR4 | 0.636107 | 0.206557 | 0.304616 | 0.312261 | 1.240007 | 0.514632 | 0.201142 | 0.616338 | 0.912581 | 0.452769 | ... | 0.210910 | 0.486442 | 0.094556 | 0.545327 | 0.631140 | 0.240997 | 0.097232 | 0.256507 | 0.538351 | 0.336128 |
| LR5 | 0.467356 | 0.537221 | 0.268395 | 0.299504 | 1.012439 | 0.164420 | 0.238765 | 0.400791 | 0.926303 | 0.745252 | ... | 0.130204 | 0.248313 | 0.128408 | 0.727289 | 0.628895 | 0.351412 | 0.229150 | 0.509813 | 0.197020 | 0.172071 |
| LR6 | 0.425357 | 0.468046 | 0.295317 | 0.738063 | 0.994177 | 0.277248 | 0.444296 | 0.469547 | 0.608873 | 0.600132 | ... | 0.034128 | 0.289565 | 0.143560 | 0.535991 | 0.548519 | 0.455494 | 0.110294 | 0.267189 | 0.716573 | 0.522119 |
| LR7 | 0.672517 | 0.359510 | 0.345488 | 0.647262 | 0.592940 | 0.468295 | 0.142851 | 0.509507 | 0.875352 | 0.767558 | ... | 0.120558 | 0.439557 | 0.138403 | 0.560273 | 1.289568 | 0.513256 | 0.323215 | 0.289336 | 0.554548 | 0.236732 |
| LR8 | 0.420459 | 0.213227 | 0.346802 | 0.889216 | 1.144970 | 0.399188 | 0.198025 | 0.718577 | 0.869530 | 0.879107 | ... | 0.127137 | 0.323299 | 0.278951 | 0.520026 | 0.655878 | 0.331321 | 0.059585 | 0.261337 | 0.449342 | 0.271909 |
| LR9 | 0.225574 | 0.224766 | 0.111285 | 0.953991 | 0.999519 | 0.375255 | 0.334924 | 0.505513 | 0.584744 | 0.822950 | ... | 0.141353 | 0.261631 | 0.158985 | 0.342039 | 0.996309 | 0.372978 | 0.070115 | 0.272225 | 0.564103 | 0.538607 |
| SR1 | 0.541430 | 0.336418 | 0.197737 | 0.046988 | 0.810848 | 0.219183 | 0.581749 | 0.608813 | 1.002117 | 0.338605 | ... | 0.077056 | 0.416653 | 0.019080 | 0.624031 | 1.051007 | 0.282724 | 0.198466 | 0.443623 | 0.559018 | 0.343402 |
| SR10 | 0.504564 | 0.300212 | 0.233929 | 0.568850 | 1.134189 | 0.219254 | 0.909579 | 0.489153 | 0.711968 | 0.944986 | ... | 0.007651 | 0.526659 | 0.153993 | 0.262082 | 1.792888 | 0.322275 | 0.483637 | 0.272206 | 0.615958 | 0.328433 |
| SR11 | 0.511758 | 0.232318 | 0.136268 | 0.551260 | 1.214901 | 0.294310 | 0.673875 | 0.349093 | 0.547173 | 1.059040 | ... | 0.043537 | 0.296001 | 0.150612 | 0.174503 | 1.874192 | 0.338145 | 0.443520 | 0.424120 | 0.531229 | 0.164711 |
| SR12 | 0.606333 | 0.299601 | 0.228888 | 0.271189 | 1.713483 | 0.208211 | 0.955955 | 0.467181 | 0.599467 | 1.163172 | ... | 0.035083 | 0.626727 | 0.128736 | 0.186526 | 1.589302 | 0.372941 | 0.306355 | 0.500446 | 0.600726 | 0.533385 |
| SR13 | 0.578468 | 0.426859 | 0.405973 | 0.230471 | 1.233838 | 0.303751 | 0.729218 | 0.447259 | 0.608088 | 1.008653 | ... | 0.053350 | 0.449563 | 0.150293 | 0.318937 | 2.314387 | 0.351817 | 0.375359 | 0.692622 | 0.617121 | 0.487284 |
| SR2 | 0.709433 | 0.459145 | 0.360103 | 0.212265 | 0.532793 | 0.195809 | 0.476331 | 0.882747 | 0.691032 | 0.396903 | ... | 0.000000 | 0.614136 | 0.217527 | 0.397955 | 1.391082 | 0.659768 | 0.132309 | 0.413107 | 0.748565 | 0.475798 |
| SR3 | 0.597196 | 0.291229 | 0.469252 | 0.141653 | 0.653141 | 0.136165 | 0.372900 | 0.762625 | 0.671649 | 0.973531 | ... | 0.000000 | 0.438778 | 0.152679 | 0.470285 | 1.355782 | 0.578515 | 0.286816 | 0.485431 | 0.529410 | 0.493056 |
| SR4 | 0.674868 | 0.591478 | 0.410552 | 0.120234 | 0.261971 | 0.479887 | 0.143048 | 0.857778 | 0.684799 | 0.525866 | ... | 0.000000 | 0.468355 | 0.356722 | 0.198945 | 1.129159 | 0.498011 | 0.400871 | 0.481583 | 0.737899 | 0.638620 |
| SR5 | 0.644822 | 0.291122 | 0.281604 | 0.238422 | 0.596421 | 0.367484 | 0.391209 | 0.593133 | 0.739747 | 0.647120 | ... | 0.070920 | 0.446879 | 0.126375 | 0.417458 | 1.514535 | 0.318517 | 0.256539 | 0.452548 | 0.761179 | 0.650596 |
| SR6 | 0.620145 | 0.400137 | 0.178400 | 0.278680 | 0.169842 | 0.412977 | 0.429417 | 0.528880 | 0.590553 | 0.759097 | ... | 0.045354 | 0.496196 | 0.343400 | 0.251242 | 1.650541 | 0.524570 | 0.291340 | 0.431733 | 0.575588 | 0.520623 |
| SR7 | 0.554810 | 0.381108 | 0.273648 | 0.211025 | 0.556922 | 0.459326 | 0.521219 | 0.692431 | 0.523594 | 0.804739 | ... | 0.035669 | 0.526665 | 0.164792 | 0.347022 | 1.874347 | 0.308721 | 0.424085 | 0.460338 | 0.627631 | 0.449003 |
| SR8 | 0.524945 | 0.268547 | 0.189305 | 0.389974 | 0.655341 | 0.202396 | 0.284384 | 0.520263 | 0.569340 | 0.944087 | ... | 0.049903 | 0.366975 | 0.477089 | 0.203036 | 1.623324 | 0.267303 | 0.227743 | 0.355108 | 0.441977 | 0.304133 |
| SR9 | 0.590473 | 0.325358 | 0.200954 | 0.686311 | 1.192252 | 0.112588 | 0.647013 | 0.560169 | 0.504554 | 0.784947 | ... | 0.022981 | 0.659514 | 0.222377 | 0.132615 | 1.554607 | 0.547129 | 0.269839 | 0.607503 | 0.493175 | 0.074956 |
26 rows × 87 columns
creating PCA plot for leaf ridges only¶
plt.style.use('default')
# exclude spiklet ridge groups 'SR' from the dataframe
filtered_data = filtered_average_gene_expression_per_group[~(
filtered_average_gene_expression_per_group.index.str.startswith('SR'))]
# Step 1: Standardize the filtered data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(filtered_data)
# Step 2: Perform PCA (reduce to 2 components for visualization)
pca = PCA(n_components=2)
pca_result = pca.fit_transform(scaled_data)
# Step 3: Retrieve explained variance for PC1 and PC2
explained_variance = pca.explained_variance_ratio_
# Step 4: Convert PCA result to a dataframe for easier plotting
# Invert PC1 and PC2 values
pca_result[:, 0] *= -1 # Invert PC1
pca_result[:, 1] *= -1 # Invert PC2
pca_df = pd.DataFrame(data=pca_result, columns=['PC1', 'PC2'], index=filtered_data.index)
# Step 5: Plot the PCA results and add explained variance to axis labels
plt.figure(figsize=(4.5, 10))
# Scatter plot with custom color for 'SR' categories
texts = [] # For storing text objects for adjustText
for i, cell_group in enumerate(pca_df.index):
plt.scatter(pca_df.iloc[i, 1], pca_df.iloc[i, 0], color='#7F7F7F', s=200)
# Adjust the offset for text labels
texts.append(plt.text(pca_df.iloc[i, 1] + 0.10, pca_df.iloc[i, 0] + 0.10, cell_group, fontsize=18))
# Adjust text positions to avoid overlap
adjust_text(texts, only_move={'points': 'y', 'texts': 'y'}, arrowprops=dict(arrowstyle='-', color='gray'))
plt.ylabel(f'Principal Component 1 ({explained_variance[0] * 100:.2f}% Variance)', fontsize=13)
plt.xlabel(f'Principal Component 2 ({explained_variance[1] * 100:.2f}% Variance)', fontsize=10)
plt.tick_params(axis='x', labelsize=18)
plt.tick_params(axis='y', labelsize=20)
# Extend the x-axis slightly
x_min, x_max = plt.xlim()
plt.xlim(x_min - 0.2, x_max + 0.3)
# Add grid lines (optional)
plt.grid(False)
# Show the plot
plt.tight_layout()
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_LRPCA.svg', dpi=700, bbox_inches='tight', format='svg', transparent=True)
plt.show()
# Calculate the correlation of all genes with PC1
correlations = filtered_data.corrwith(pca_df['PC1'])
#saving correlation scores to make a supplementary table combined with SR scores
correlations_LR = pd.DataFrame(correlations.copy())
correlations_LR = correlations.reset_index()
correlations_LR.columns = ['geneid', 'Corr_PC1_LRs']
# Sort by absolute value to get the strongest correlations (both positive and negative)
top_correlations_LRs = correlations.abs().sort_values(ascending=False).head(25)
# Convert this to a dataframe for easier visualization
top_correlations_LRs_df = pd.DataFrame({'Gene': top_correlations_LRs.index, 'Correlation': correlations[top_correlations_LRs.index]})
# Merge with annotation
top_correlations_LRs_df = pd.merge(top_correlations_LRs_df, annotation, left_on='Gene', right_index=True, how='inner')
top_correlations_LRs_df
| Gene | Correlation | annotated_name | |
|---|---|---|---|
| TraesCS5A02G230500 | TraesCS5A02G230500 | 0.950820 | TraesCS5A02G230500 |
| TraesCS1D02G162600 | TraesCS1D02G162600 | 0.944911 | TraesCS1D02G162600_TaYABBY1D |
| TraesCS6A02G287300 | TraesCS6A02G287300 | -0.933733 | TraesCS6A02G287300_OsLEC1 |
| TraesCS5A02G473800 | TraesCS5A02G473800 | -0.932904 | TraesCS5A02G473800_AP2-5 |
| TraesCS5A02G401800 | TraesCS5A02G401800 | 0.931758 | TraesCS5A02G401800_OsDof15 |
| TraesCS7A02G165600 | TraesCS7A02G165600 | -0.921788 | TraesCS7A02G165600_OsGRF2 |
| TraesCS4A02G191300 | TraesCS4A02G191300 | -0.917319 | TraesCS4A02G191300_OsSCR2 |
| TraesCS6B02G144000 | TraesCS6B02G144000 | -0.902893 | TraesCS6B02G144000_OseIF-4A |
| TraesCS1D02G343400 | TraesCS1D02G343400 | -0.895633 | TraesCS1D02G343400_TaPEL3 |
| TraesCS3D02G284200 | TraesCS3D02G284200 | 0.894141 | TraesCS3D02G284200_OsMADS32 |
| TraesCS5A02G265900 | TraesCS5A02G265900 | -0.889074 | TraesCS5A02G265900_TaSPL17 |
| TraesCS7A02G308400 | TraesCS7A02G308400 | -0.882417 | TraesCS7A02G308400_OsROC7t |
| TraesCS2B02G464200 | TraesCS2B02G464200 | 0.831832 | TraesCS2B02G464200_TaLFY2 |
| TraesCS3D02G357400 | TraesCS3D02G357400 | -0.825713 | TraesCS3D02G357400_OsqSH1 |
| TraesCS4D02G245300 | TraesCS4D02G245300 | 0.814378 | TraesCS4D02G245300_TaYABBY4D |
| TraesCS5B02G507300 | TraesCS5B02G507300 | 0.790324 | TraesCS5B02G507300 |
| TraesCS2D02G256600 | TraesCS2D02G256600 | -0.786988 | TraesCS2D02G256600_OsPLT8 |
| TraesCS7A02G313100 | TraesCS7A02G313100 | -0.770056 | TraesCS7A02G313100_OsGAPC3 |
| TraesCS7D02G191600 | TraesCS7D02G191600 | 0.748314 | TraesCS7D02G191600_TaPIN1 |
| TraesCS2A02G174300 | TraesCS2A02G174300 | -0.745112 | TraesCS2A02G174300_FUL3_OsMADS18 |
| TraesCS6A02G313800 | TraesCS6A02G313800 | -0.741879 | TraesCS6A02G313800_SVP1_OsMADS22 |
| TraesCS1A02G199600 | TraesCS1A02G199600 | 0.718001 | TraesCS1A02G199600_OsMADS56 |
| TraesCS3A02G441700 | TraesCS3A02G441700 | -0.690393 | TraesCS3A02G441700 |
| TraesCS4D02G296400 | TraesCS4D02G296400 | -0.688688 | TraesCS4D02G296400 |
| TraesCS1D02G197300 | TraesCS1D02G197300 | -0.673159 | TraesCS1D02G197300_OsROC3t |
# Define categories (cell groups)
categories = ['LR1', 'LR2', 'LR3', 'LR4', 'LR5', 'LR6', 'LR7', 'LR8', 'LR9',
'LR10', 'LR11', 'LR12', 'LR13']
# Process the first dataset (positive correlations)
filtered_df_pos = top_correlations_LRs_df[top_correlations_LRs_df['Correlation'] > 0.80]
gene_list_pos = filtered_df_pos['Gene'].tolist()
filtered_data_pos = filtered_average_gene_expression_per_group.loc[categories, gene_list_pos]
lineplot_data_pos = filtered_data_pos.T
lineplot_data_zscore_pos = lineplot_data_pos.apply(zscore, axis=1)
average_trend_pos = lineplot_data_zscore_pos.mean(axis=0)
smoothed_average_trend_pos = savgol_filter(average_trend_pos, window_length=5, polyorder=2)
x = np.arange(len(categories)) # Numerical representation of cell groups
coeffs_pos = np.polyfit(x, smoothed_average_trend_pos, 3)
poly_fit_pos = np.polyval(coeffs_pos, x)
# Process the second dataset (negative correlations)
filtered_df_neg = top_correlations_LRs_df[top_correlations_LRs_df['Correlation'] < -0.80]
gene_list_neg = filtered_df_neg['Gene'].tolist()
filtered_data_neg = filtered_average_gene_expression_per_group.loc[categories, gene_list_neg]
lineplot_data_neg = filtered_data_neg.T
lineplot_data_zscore_neg = lineplot_data_neg.apply(zscore, axis=1)
average_trend_neg = lineplot_data_zscore_neg.mean(axis=0) # Average only the negative group's points
smoothed_average_trend_neg = savgol_filter(average_trend_neg, window_length=5, polyorder=2)
coeffs_neg = np.polyfit(x, smoothed_average_trend_neg, 3) # Fit only to negative group's smoothed average
poly_fit_neg = np.polyval(coeffs_neg, x)
# Plotting
plt.figure(figsize=(8, 10))
# Plot smoothed lines for each gene (positive correlations)
for row_name in lineplot_data_zscore_pos.index:
smoothed_values = savgol_filter(lineplot_data_zscore_pos.loc[row_name, categories],
window_length=5, polyorder=2)
plt.plot(smoothed_values, categories, label=f'{row_name} (Positive)', linewidth=2, color='grey', alpha=0.4)
# Plot smoothed lines for each gene (negative correlations)
for row_name in lineplot_data_zscore_neg.index:
smoothed_values = savgol_filter(lineplot_data_zscore_neg.loc[row_name, categories],
window_length=5, polyorder=2)
plt.plot(smoothed_values, categories, label=f'{row_name} (Negative)', linewidth=2, color='grey', alpha=0.4)
# Add trend lines
plt.plot(poly_fit_pos, categories, color='dimgrey', linestyle='-', linewidth=4, label='Positive Trend Line')
plt.plot(poly_fit_neg, categories, color='dimgrey', linestyle='-', linewidth=4, label='Negative Trend Line')
plt.tick_params(axis='x', labelsize=17)
plt.tick_params(axis='y', labelsize=20)
# Labels and formatting
plt.xlabel('Z-Score Normalized Expression', size = 20)
plt.yticks(rotation=0)
plt.legend(title="Trends", bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=8)
plt.tight_layout()
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_LR_gradients.svg', dpi=700, bbox_inches='tight', format='svg', transparent=True)
plt.show()
plt.figure(figsize=(10, 2.5))
# Define the categories in the desired order (cell groups)
categories = ['LR1', 'LR2', 'LR3', 'LR4', 'LR5', 'LR6', 'LR7', 'LR8', 'LR9',
'LR10', 'LR11', 'LR12', 'LR13']
# Define the genes to plot
genes_to_plot = ['TraesCS6A02G287300',
'TraesCS5A02G265900', 'TraesCS5A02G473800', 'TraesCS3D02G357400',
'TraesCS3D02G284200', 'TraesCS1D02G162600', 'TraesCS4D02G245300'
]
# Filter the DataFrame to select only the desired categories and genes
filtered_data = filtered_average_gene_expression_per_group.loc[categories, genes_to_plot]
# Transpose for heatmap format (genes on y-axis, cell groups on x-axis)
heatmap_data = filtered_data.T
heatmap_data_zscore = heatmap_data.apply(zscore, axis=1)
# Annotate data
heatmap_data_zscore_Annotated = pd.merge(heatmap_data_zscore, annotation, left_index=True, right_index=True, how='left')
heatmap_data_zscore_Annotated.set_index('annotated_name', inplace=True)
# Perform hierarchical clustering on rows and get reordered indices
#row_linkage = linkage(heatmap_data_zscore_Annotated, method='average', metric='euclidean')
#row_order = leaves_list(row_linkage)[::-1] # Reverse the clustering order
# Reorder rows based on the reversed clustering
#heatmap_data_reordered = heatmap_data_zscore_Annotated.iloc[row_order]
# Plot using the reordered data
ax = sns.heatmap(heatmap_data_zscore_Annotated, cmap='viridis', linewidths=0.0)
plt.title('Mirrored Hierarchical Clustering of Genes Across Cell Groups')
# Set the row label font size
ax.set_yticklabels(ax.get_yticklabels(), fontsize=8) # Adjust fontsize as needed
ax.set_xticklabels(ax.get_xticklabels(), rotation=270, ha='center', fontsize=10) # Increase fontsize as needed
plt.tick_params(axis='x', labelsize=16)
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_LR_heatmap.svg', dpi=700, bbox_inches='tight', format='svg', transparent=True)
plt.show()
creating PCA plot for spikelet ridges only¶
plt.style.use('default')
# exclude spiklet ridge groups 'SR' from the dataframe
filtered_data = filtered_average_gene_expression_per_group[~(
filtered_average_gene_expression_per_group.index.str.startswith('LR'))]
# Step 1: Standardize the filtered data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(filtered_data)
# Step 2: Perform PCA (reduce to 2 components for visualization)
pca = PCA(n_components=2)
pca_result = pca.fit_transform(scaled_data)
# Step 3: Retrieve explained variance for PC1 and PC2
explained_variance = pca.explained_variance_ratio_
# Step 4: Convert PCA result to a dataframe for easier plotting
# Invert PC1 and PC2 values
pca_result[:, 0] *= -1 # Invert PC1
pca_result[:, 1] *= -1 # Invert PC2
pca_df = pd.DataFrame(data=pca_result, columns=['PC1', 'PC2'], index=filtered_data.index)
# Step 5: Plot the PCA results and add explained variance to axis labels
plt.figure(figsize=(4.5, 10))
# Scatter plot with custom color for 'SR' categories
texts = [] # For storing text objects for adjustText
for i, cell_group in enumerate(pca_df.index):
plt.scatter(pca_df.iloc[i, 1], pca_df.iloc[i, 0], color='#7F7F7F', s=200)
# Adjust the offset for text labels
texts.append(plt.text(pca_df.iloc[i, 1] + 0.10, pca_df.iloc[i, 0] + 0.10, cell_group, fontsize=18))
# Adjust text positions to avoid overlap
adjust_text(texts, only_move={'points': 'y', 'texts': 'y'}, arrowprops=dict(arrowstyle='-', color='gray'))
plt.ylabel(f'Principal Component 1 ({explained_variance[0] * 100:.2f}% Variance)', fontsize=13)
plt.xlabel(f'Principal Component 2 ({explained_variance[1] * 100:.2f}% Variance)', fontsize=10)
plt.tick_params(axis='x', labelsize=18)
plt.tick_params(axis='y', labelsize=20)
# Extend the x-axis slightly
x_min, x_max = plt.xlim()
plt.xlim(x_min - 0.2, x_max + 0.3)
# Add grid lines (optional)
plt.grid(False)
# Show the plot
plt.tight_layout()
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_SRPCA.svg', dpi=700, bbox_inches='tight', format='svg', transparent=True)
plt.show()
# Calculate the correlation of all genes with PC1
correlations = filtered_data.corrwith(pca_df['PC1'])
#saving correlation scores to make a supplementary table combined with LR scores
correlations_SR = pd.DataFrame(correlations.copy())
correlations_SR = correlations.reset_index()
correlations_SR.columns = ['geneid', 'Corr_PC1_SRs']
# Sort by absolute value to get the strongest correlations (both positive and negative)
top_correlations_SRs = correlations.abs().sort_values(ascending=False).head(25)
# Convert this to a dataframe for easier visualization
top_correlations_SRs_df = pd.DataFrame({'Gene': top_correlations_SRs.index, 'Correlation': correlations[top_correlations_SRs.index]})
# Merge with annotation
top_correlations_SRs_df = pd.merge(top_correlations_SRs_df, annotation, left_on='Gene', right_index=True, how='inner')
top_correlations_SRs_df
| Gene | Correlation | annotated_name | |
|---|---|---|---|
| TraesCS3D02G357400 | TraesCS3D02G357400 | -0.922572 | TraesCS3D02G357400_OsqSH1 |
| TraesCS5D02G449200 | TraesCS5D02G449200 | 0.914028 | TraesCS5D02G449200 |
| TraesCS6A02G373500 | TraesCS6A02G373500 | -0.890685 | TraesCS6A02G373500_OsbHLH107 |
| TraesCS7A02G246500 | TraesCS7A02G246500 | -0.890351 | TraesCS7A02G246500_TaSPL14 |
| TraesCS4A02G123800 | TraesCS4A02G123800 | 0.877349 | TraesCS4A02G123800_TaAIL6 |
| TraesCS6D02G248300 | TraesCS6D02G248300 | -0.872818 | TraesCS6D02G248300_OsKANADI1 |
| TraesCS4A02G256700 | TraesCS4A02G256700 | 0.867609 | TraesCS4A02G256700_TaKNOX5 |
| TraesCS5A02G549700 | TraesCS5A02G549700 | 0.843138 | TraesCS5A02G549700_OsHOX10 |
| TraesCS4A02G016000 | TraesCS4A02G016000 | 0.828829 | TraesCS4A02G016000_TaILI3 |
| TraesCS3A02G246000 | TraesCS3A02G246000 | 0.822369 | TraesCS3A02G246000_OsARF2 |
| TraesCS2A02G174300 | TraesCS2A02G174300 | -0.819826 | TraesCS2A02G174300_FUL3_OsMADS18 |
| TraesCS5B02G047200 | TraesCS5B02G047200 | 0.819488 | TraesCS5B02G047200_OsHOX33 |
| TraesCS5A02G265900 | TraesCS5A02G265900 | -0.783418 | TraesCS5A02G265900_TaSPL17 |
| TraesCS5A02G401800 | TraesCS5A02G401800 | 0.767250 | TraesCS5A02G401800_OsDof15 |
| TraesCS7D02G246100 | TraesCS7D02G246100 | -0.746511 | TraesCS7D02G246100_OsCUC3 |
| TraesCS5A02G230500 | TraesCS5A02G230500 | 0.735964 | TraesCS5A02G230500 |
| TraesCS1A02G241400 | TraesCS1A02G241400 | 0.732095 | TraesCS1A02G241400_OsUBQ7 |
| TraesCS5B02G507300 | TraesCS5B02G507300 | -0.724171 | TraesCS5B02G507300 |
| TraesCS2D02G256600 | TraesCS2D02G256600 | -0.724119 | TraesCS2D02G256600_OsPLT8 |
| TraesCS2B02G281000 | TraesCS2B02G281000 | -0.694769 | TraesCS2B02G281000_FUL2_OsMADS15 |
| TraesCS7D02G342300 | TraesCS7D02G342300 | -0.690556 | TraesCS7D02G342300_OsCUC1 |
| TraesCS7D02G191600 | TraesCS7D02G191600 | 0.689305 | TraesCS7D02G191600_TaPIN1 |
| TraesCS5B02G396700 | TraesCS5B02G396700 | -0.665299 | TraesCS5B02G396700_SEP1-6_OsMADS34 |
| TraesCS3A02G441700 | TraesCS3A02G441700 | -0.664007 | TraesCS3A02G441700 |
| TraesCS1D02G075700 | TraesCS1D02G075700 | -0.648462 | TraesCS1D02G075700_TaKNOX3 |
# Define categories (cell groups)
categories = ['SR1', 'SR2', 'SR3', 'SR4', 'SR5', 'SR6', 'SR7', 'SR8', 'SR9',
'SR10', 'SR11', 'SR12', 'SR13']
# Process the first dataset (positive correlations)
filtered_df_pos = top_correlations_SRs_df[top_correlations_SRs_df['Correlation'] > 0.80]
gene_list_pos = filtered_df_pos['Gene'].tolist()
filtered_data_pos = filtered_average_gene_expression_per_group.loc[categories, gene_list_pos]
lineplot_data_pos = filtered_data_pos.T
lineplot_data_zscore_pos = lineplot_data_pos.apply(zscore, axis=1)
average_trend_pos = lineplot_data_zscore_pos.mean(axis=0)
smoothed_average_trend_pos = savgol_filter(average_trend_pos, window_length=5, polyorder=2)
x = np.arange(len(categories)) # Numerical representation of cell groups
coeffs_pos = np.polyfit(x, smoothed_average_trend_pos, 3)
poly_fit_pos = np.polyval(coeffs_pos, x)
# Process the second dataset (negative correlations)
filtered_df_neg = top_correlations_SRs_df[top_correlations_SRs_df['Correlation'] < -0.80]
gene_list_neg = filtered_df_neg['Gene'].tolist()
filtered_data_neg = filtered_average_gene_expression_per_group.loc[categories, gene_list_neg]
lineplot_data_neg = filtered_data_neg.T
lineplot_data_zscore_neg = lineplot_data_neg.apply(zscore, axis=1)
average_trend_neg = lineplot_data_zscore_neg.mean(axis=0) # Average only the negative group's points
smoothed_average_trend_neg = savgol_filter(average_trend_neg, window_length=5, polyorder=2)
coeffs_neg = np.polyfit(x, smoothed_average_trend_neg, 3) # Fit only to negative group's smoothed average
poly_fit_neg = np.polyval(coeffs_neg, x)
# Plotting
plt.figure(figsize=(8, 10))
# Plot smoothed lines for each gene (positive correlations)
for row_name in lineplot_data_zscore_pos.index:
smoothed_values = savgol_filter(lineplot_data_zscore_pos.loc[row_name, categories],
window_length=5, polyorder=2)
plt.plot(smoothed_values, categories, label=f'{row_name} (Positive)', linewidth=2, color='grey', alpha=0.4)
# Plot smoothed lines for each gene (negative correlations)
for row_name in lineplot_data_zscore_neg.index:
smoothed_values = savgol_filter(lineplot_data_zscore_neg.loc[row_name, categories],
window_length=5, polyorder=2)
plt.plot(smoothed_values, categories, label=f'{row_name} (Negative)', linewidth=2, color='grey', alpha=0.4)
# Add trend lines
plt.plot(poly_fit_pos, categories, color='dimgrey', linestyle='-', linewidth=4, label='Positive Trend Line')
plt.plot(poly_fit_neg, categories, color='dimgrey', linestyle='-', linewidth=4, label='Negative Trend Line')
plt.tick_params(axis='x', labelsize=17)
plt.tick_params(axis='y', labelsize=20)
# Labels and formatting
plt.xlabel('Z-Score Normalized Expression', size = 20)
plt.yticks(rotation=0)
plt.legend(title="Trends", bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=8)
plt.tight_layout()
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_SR_gradients.svg', dpi=700, bbox_inches='tight', format='svg', transparent=True)
plt.show()
plt.figure(figsize=(10, 2.5))
# Define the categories in the desired order (cell groups)
categories = ['SR1', 'SR2', 'SR3', 'SR4', 'SR5', 'SR6', 'SR7', 'SR8', 'SR9',
'SR10', 'SR11', 'SR12', 'SR13']
# Define the genes to plot
genes_to_plot = ['TraesCS7A02G246500', 'TraesCS3D02G357400', 'TraesCS2A02G174300', 'TraesCS5A02G265900','TraesCS4A02G256700', 'TraesCS4A02G123800', 'TraesCS4A02G016000', 'TraesCS5A02G549700']
# Filter the DataFrame to select only the desired categories and genes
filtered_data = filtered_average_gene_expression_per_group.loc[categories, genes_to_plot]
# Transpose for heatmap format (genes on y-axis, cell groups on x-axis)
heatmap_data = filtered_data.T
heatmap_data_zscore = heatmap_data.apply(zscore, axis=1)
# Annotate data
heatmap_data_zscore_Annotated = pd.merge(heatmap_data_zscore, annotation, left_index=True, right_index=True, how='left')
heatmap_data_zscore_Annotated.set_index('annotated_name', inplace=True)
# Plot using the reordered data
ax = sns.heatmap(heatmap_data_zscore_Annotated, cmap='viridis', linewidths=0.0)
plt.title('Mirrored Hierarchical Clustering of Genes Across Cell Groups')
# Set the row label font size
ax.set_yticklabels(ax.get_yticklabels(), fontsize=8) # Adjust fontsize as needed
ax.set_xticklabels(ax.get_xticklabels(), rotation=270, ha='center', fontsize=10) # Increase fontsize as needed
plt.tick_params(axis='x', labelsize=16)
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_SR_gradients.svg', dpi=700, bbox_inches='tight', format='svg', transparent=True)
plt.show()
Generating a summary file of correlations to PC1 for both leaf ridges and spikelet ridges¶
I want to make a file that summarises
(Supplementary Table 15)
merged_correlations = correlations_SR.merge(correlations_LR, on='geneid', how='inner')
merged_correlations.to_csv("latedoubleridge_expressionanalysis/PC1correlation_scores_SRLRgroups.csv", index=False)
Generating a summary file for all groupings used¶
I want to make one file that captures all cells in my sample, which are considered inflorescence, and which are used in leaf ridge and spikelet ridge groupings
(Supplementary Table 13)cellsinspike_categories = cellsinspike_VGN1e1.rename(columns={'cell_id': 'EntityID'})
cellsinspike_categories['inflorescence_category'] = 'inflorescence'
cellgroups_categories = cellgroups.copy()
cellgroups_categories = cellgroups_categories.reset_index().rename(columns={'index': 'EntityID', 'Custom cell groups': 'LRSRgrouping'})
# Filter to create a new AnnData object with only the sample 'VGN1e1'
adata_VGN1e1 = adata_spatial[adata_spatial.obs['dataset'] == 'VGN1e1'].copy()
adata_VGN1e1.obs.index = [index.split('-')[0] for index in adata_VGN1e1.obs.index]
# Convert adata_VGN1e1.obs to a dataframe and reset the index to make the index a column
adata_VGN1e1_df = adata_VGN1e1.obs.reset_index()
# Rename the index column if necessary (for example, to 'cell_id')
adata_VGN1e1_df = adata_VGN1e1_df.rename(columns={'index': 'EntityID'})
# Step 1: Filter adata_VGN1e1_df to include only the specified columns
filtered_adata_VGN1e1_df = adata_VGN1e1_df[['EntityID', 'dataset', 'clusters']]
# Step 2: Perform a left merge with cellsinspike_categories and cellgroups_categories on 'EntityID'
merged_cellcategory_df = filtered_adata_VGN1e1_df.merge(cellsinspike_categories, on='EntityID', how='left').merge(cellgroups_categories, on='EntityID', how='left')
merged_cellcategory_df
merged_cellcategory_df.to_csv("latedoubleridge_expressionanalysis/CellCategories_VGN1e1_inflorescence_SRLRgroups.csv", index=False)
Making Visualisations of domain maps and transcripts¶
segmentation_df = gpd.read_parquet('cell_segmentation/VGN1e_region1_output/cellpose2_micron_space_VGN1e1.parquet')
transcripts_df = pd.read_csv('cell_segmentation/VGN1e_region1_output/detected_transcripts_VGN1e1.csv')
# Filter to create a new AnnData object with only the sample 'VGN1e1'
adata_VGN1e1 = adata_spatial[adata_spatial.obs['dataset'] == 'VGN1e1'].copy()
adata_VGN1e1.obs.index = [index.split('-')[0] for index in adata_VGN1e1.obs.index]
# Convert adata_VGN1e1.obs to a dataframe and reset the index to make the index a column
adata_VGN1e1_df = adata_VGN1e1.obs.reset_index()
# Rename the index column if necessary (for example, to 'cell_id')
adata_VGN1e1_df = adata_VGN1e1_df.rename(columns={'index': 'cell_id'})
adata_VGN1e1_df['cell_id'] = adata_VGN1e1_df['cell_id'].astype(str).str.strip()
cell_ids = adata_VGN1e1_df['cell_id'].tolist()
cell_ids = [cell_id.strip() for cell_id in cell_ids]
# Convert EntityID in filtered_segmentation_df to strings to match with cell_ids
filtered_segmentation_df = segmentation_df.copy()
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
# Now filter based on matching values in cell_ids
filtered_segmentation_df = filtered_segmentation_df[filtered_segmentation_df['EntityID'].isin(cell_ids)]
filtered_segmentation_df
| ID | EntityID | ZIndex | Geometry | ParentType | ParentID | Type | ZLevel | Name | |
|---|---|---|---|---|---|---|---|---|---|
| 48334 | 48334 | 2343479300017100075 | 1 | MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... | None | None | cell | 3.0 | None |
| 48829 | 48829 | 2343479300017100075 | 6 | MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... | None | None | cell | 10.5 | None |
| 48664 | 48664 | 2343479300017100075 | 0 | MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... | None | None | cell | 1.5 | None |
| 48499 | 48499 | 2343479300017100075 | 5 | MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... | None | None | cell | 9.0 | None |
| 48004 | 48004 | 2343479300017100075 | 2 | MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... | None | None | cell | 4.5 | None |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 28175 | 28175 | 2343479300098100014 | 2 | MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... | None | None | cell | 4.5 | None |
| 28195 | 28195 | 2343479300098100014 | 0 | MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... | None | None | cell | 1.5 | None |
| 28200 | 28200 | 2343479300098100014 | 6 | MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... | None | None | cell | 10.5 | None |
| 28185 | 28185 | 2343479300098100014 | 1 | MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... | None | None | cell | 3.0 | None |
| 28170 | 28170 | 2343479300098100014 | 3 | MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... | None | None | cell | 6.0 | None |
15463 rows × 9 columns
# Ensure segmentation_df is a GeoDataFrame
filtered_segmentation_df = gpd.GeoDataFrame(filtered_segmentation_df, geometry='Geometry')
# Convert transcripts_df to GeoDataFrame
transcripts_gdf = gpd.GeoDataFrame(
transcripts_df,
geometry=gpd.points_from_xy(transcripts_df['global_x'], transcripts_df['global_y'])
)
# Perform spatial join and drop duplicate rows
filtered_transcripts_gdf = gpd.sjoin(
transcripts_gdf,
filtered_segmentation_df[['EntityID', 'Geometry']],
how='inner',
predicate='within'
)
# Remove unnecessary columns and duplicates
filtered_transcripts_df = (
filtered_transcripts_gdf
.drop(columns=['index_right', 'Unnamed: 0'], errors='ignore') # Drop join-related columns
.drop_duplicates() # Remove duplicate rows
)
# Reset the index for clarity
filtered_transcripts_df.reset_index(drop=True, inplace=True)
# Display result
filtered_transcripts_df
| barcode_id | global_x | global_y | global_z | x | y | fov | gene | transcript_id | cell_id | geometry | EntityID | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 38 | 3446.3018 | 7837.6090 | 1.0 | 388.12305 | 474.39932 | 75 | TraesCS3D02G284200 | TraesCS3D02G284200_1 | 2343479300095100291 | POINT (3446.302 7837.609) | 2343479300095100291 |
| 1 | 40 | 3444.9426 | 7839.1235 | 1.0 | 375.53894 | 488.42523 | 75 | TraesCS1A02G199600 | TraesCS1A02G199600_2 | 2343479300095100291 | POINT (3444.943 7839.123) | 2343479300095100291 |
| 2 | 70 | 3444.9240 | 7838.3220 | 1.0 | 375.36667 | 481.00000 | 75 | TraesCS7D02G246100 | TraesCS7D02G246100_1 | 2343479300095100291 | POINT (3444.924 7838.322) | 2343479300095100291 |
| 3 | 93 | 3442.4006 | 7835.5137 | 1.0 | 352.00000 | 455.00000 | 75 | TraesCS3B02G435700 | TraesCS3B02G435700_1 | 2343479300095100291 | POINT (3442.401 7835.514) | 2343479300095100291 |
| 4 | 98 | 3444.8670 | 7839.4214 | 1.0 | 374.83790 | 491.18338 | 75 | TraesCS4D02G076900 | TraesCS4D02G076900_1 | 2343479300095100291 | POINT (3444.867 7839.421) | 2343479300095100291 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 325696 | 129 | 4121.7773 | 6368.6880 | 6.0 | 971.40650 | 1758.59350 | 199 | TraesCS7A02G313100 | TraesCS7A02G313100_3 | 2343479300019100003 | POINT (4121.777 6368.688) | 2343479300019100003 |
| 325697 | 196 | 4116.4194 | 6367.1820 | 6.0 | 921.79400 | 1744.65250 | 199 | TraesCS2B02G260800 | TraesCS2B02G260800_1 | 2343479300019100003 | POINT (4116.419 6367.182) | 2343479300019100003 |
| 325698 | 196 | 4120.6953 | 6369.7456 | 6.0 | 961.38745 | 1768.38750 | 199 | TraesCS2B02G260800 | TraesCS2B02G260800_1 | 2343479300019100003 | POINT (4120.695 6369.746) | 2343479300019100003 |
| 325699 | 211 | 4115.4690 | 6368.1646 | 6.0 | 913.00000 | 1753.74580 | 199 | TraesCS2A02G114900 | TraesCS2A02G114900_1 | 2343479300019100003 | POINT (4115.469 6368.165) | 2343479300019100003 |
| 325700 | 245 | 4123.5693 | 6371.2160 | 6.0 | 988.00000 | 1782.00000 | 199 | TraesCS2A02G391100 | TraesCS2A02G391100_2 | 2343479300019100003 | POINT (4123.569 6371.216) | 2343479300019100003 |
325701 rows × 12 columns
adata_VGN1e1_df = adata_VGN1e1.obs[['clusters']].copy()
adata_VGN1e1_df.index = adata_VGN1e1_df.index.astype(str) # Ensure the index is a string if necessary
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
merged_df = filtered_segmentation_df.merge(adata_VGN1e1_df, left_on='EntityID', right_index=True)
merged_df
| ID | EntityID | ZIndex | Geometry | ParentType | ParentID | Type | ZLevel | Name | clusters | |
|---|---|---|---|---|---|---|---|---|---|---|
| 48334 | 48334 | 2343479300017100075 | 1 | MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... | None | None | cell | 3.0 | None | 6 |
| 48829 | 48829 | 2343479300017100075 | 6 | MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... | None | None | cell | 10.5 | None | 6 |
| 48664 | 48664 | 2343479300017100075 | 0 | MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... | None | None | cell | 1.5 | None | 6 |
| 48499 | 48499 | 2343479300017100075 | 5 | MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... | None | None | cell | 9.0 | None | 6 |
| 48004 | 48004 | 2343479300017100075 | 2 | MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... | None | None | cell | 4.5 | None | 6 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 28175 | 28175 | 2343479300098100014 | 2 | MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... | None | None | cell | 4.5 | None | 12 |
| 28195 | 28195 | 2343479300098100014 | 0 | MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... | None | None | cell | 1.5 | None | 12 |
| 28200 | 28200 | 2343479300098100014 | 6 | MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... | None | None | cell | 10.5 | None | 12 |
| 28185 | 28185 | 2343479300098100014 | 1 | MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... | None | None | cell | 3.0 | None | 12 |
| 28170 | 28170 | 2343479300098100014 | 3 | MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... | None | None | cell | 6.0 | None | 12 |
15463 rows × 10 columns
First I will make domain maps for Figure 5A-B
import numpy as np
import math
from shapely.geometry import Polygon, MultiPolygon
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
import matplotlib.patches as patches
# Define custom colors for specific clusters
custom_cluster_colors = {
'0': '#a70e62',
'12': '#6ecad5',
'3': '#f0b932',
'11': '#fed2ff',
'4': '#ffd95a'
}
# Default color for unspecified clusters
default_color = 'none'
# Define rotation parameters
angle = -22 # Degrees
radians = math.radians(angle)
cos_angle = math.cos(radians)
sin_angle = math.sin(radians)
rotation_matrix = np.array([[cos_angle, -sin_angle], [sin_angle, cos_angle]])
# Calculate the center of the filtered segmentation data for rotation
segmentation_center_x = merged_df['Geometry'].centroid.x.mean()
segmentation_center_y = merged_df['Geometry'].centroid.y.mean()
# Apply rotation and assign cluster colors to each Polygon
rotated_patches_list = []
for _, row in merged_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
# Assign color based on cluster and set edge color based on whether it's default
if cluster in custom_cluster_colors:
color = custom_cluster_colors[cluster]
edge_color = '#595959' # Default edge color for specified clusters
else:
color = default_color
edge_color = 'none' # Edge color for unspecified clusters
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
centered_coords = np.array(poly.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
rotated_poly = Polygon(rotated_coords)
patch = patches.Polygon(rotated_coords, closed=True, facecolor=color, edgecolor=edge_color, linewidth=0.5)
rotated_patches_list.append(patch)
elif isinstance(shape, Polygon):
centered_coords = np.array(shape.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
rotated_poly = Polygon(rotated_coords)
patch = patches.Polygon(rotated_coords, closed=True, facecolor=color, edgecolor=edge_color, linewidth=0.5)
rotated_patches_list.append(patch)
# Plot the rotated and colored polygons
fig, ax = plt.subplots(figsize=(10, 10))
# Add rotated polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)
# Calculate axis limits
rotated_x_coords = [coord[0] for patch in rotated_patches_list for coord in patch.get_xy()]
rotated_y_coords = [coord[1] for patch in rotated_patches_list for coord in patch.get_xy()]
x_min, x_max = min(rotated_x_coords), max(rotated_x_coords)
y_min, y_max = min(rotated_y_coords), max(rotated_y_coords)
# Set axis limits
ax.set_ylim(6950, 7210)
ax.set_xlim(3950, 4110)
ax.set_aspect('equal')
# Add a 25-micron scale bar
scale_bar_length = 50 # Microns
x_start = 3955 # Starting x-coordinate of the scale bar
y_start = 6955 # Starting y-coordinate of the scale bar
x_end = x_start + scale_bar_length # Ending x-coordinate of the scale bar
# Draw the scale bar line
ax.hlines(y_start, x_start, x_end, colors='black', linewidth=3)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_domainmap_basal_Scalebar50.png', bbox_inches='tight', format='png', dpi = 700)
plt.tight_layout()
plt.show()
now adding transcripts to same area
clusters_to_include = ['0', '3', '4', '12', '11'] # Example cluster list
genes_of_interest = {
'TraesCS4A02G256700': '#C51D71', #knox
'TraesCS1A02G418200': 'darkturquoise', #nl1
'TraesCS7D02G246100': '#F57BEC'#cuc
}
# I want to filter down my datasets to make this process faster to map the cell segmentation and transcripts
# Step 1: Filter `merged_df` to include only selected clusters
filtered_merged_df = merged_df[merged_df['clusters'].astype(str).isin(clusters_to_include)]
# Convert the 'EntityID' column of filtered_merged_df into a list
entity_id_list = filtered_merged_df['EntityID'].tolist()
#note, I will use the original merged_df for the graph because this mantains the same segmentation_center_x and segmentation_center_y
# Filter filtered_transcripts_df to contain only rows where 'EntityID' is in the list and only genes of interest
filtered_transcripts_df2 = filtered_transcripts_df.copy()
filtered_transcripts_df2 = filtered_transcripts_df2[filtered_transcripts_df2['EntityID'].isin(entity_id_list)]
filtered_transcripts_df2 = filtered_transcripts_df2[filtered_transcripts_df2['gene'].isin(genes_of_interest.keys())]
# Define rotation parameters
angle = -22 # Degrees
radians = math.radians(angle)
cos_angle = math.cos(radians)
sin_angle = math.sin(radians)
rotation_matrix = np.array([[cos_angle, -sin_angle], [sin_angle, cos_angle]])
# Calculate the center of the segmentation data for rotation
segmentation_center_x = merged_df['Geometry'].centroid.x.mean()
segmentation_center_y = merged_df['Geometry'].centroid.y.mean()
rotated_patches_list = []
for _, row in filtered_merged_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Ensure clusters are strings to match the keys in the color dictionary
# Set face and edge colors for included clusters
face_color = '#F3F3F3' # Default fill color for included clusters
edge_color = 'dimgrey' # Default edge color
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
centered_coords = np.array(poly.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
patch = patches.Polygon(rotated_coords, closed=True, facecolor=face_color, edgecolor=edge_color, linewidth=0.5)
rotated_patches_list.append(patch)
elif isinstance(shape, Polygon):
centered_coords = np.array(shape.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
patch = patches.Polygon(rotated_coords, closed=True, facecolor=face_color, edgecolor=edge_color, linewidth=0.5)
rotated_patches_list.append(patch)
# Step 2: Rotate transcript coordinates without filtering by polygon containment
rotated_transcript_data = {}
for gene, color in genes_of_interest.items():
transcripts_of_interest = filtered_transcripts_df2[filtered_transcripts_df2['gene'] == gene].copy()
transcript_coords = transcripts_of_interest[['global_x', 'global_y']].to_numpy()
# Rotate coordinates
centered_transcript_coords = transcript_coords - np.array([segmentation_center_x, segmentation_center_y])
rotated_transcript_coords = np.dot(centered_transcript_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
# Store rotated coordinates and color for plotting
rotated_transcript_data[gene] = {
'x': rotated_transcript_coords[:, 0],
'y': rotated_transcript_coords[:, 1],
'color': color
}
# Plot the rotated segmentation and transcripts
fig, ax = plt.subplots(figsize=(10, 10))
# Add rotated segmentation polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)
# Plot each gene's rotated transcripts with unique colors
for gene, data in rotated_transcript_data.items():
ax.scatter(data['x'], data['y'], color=data['color'], s=25, alpha=1, label=gene)
# Set axis limits
ax.set_ylim(6950, 7210)
ax.set_xlim(3950, 4110)
ax.set_aspect('equal')
# Add a 25-micron scale bar
scale_bar_length = 50 # Microns
x_start = 3955 # Starting x-coordinate of the scale bar
y_start = 6955 # Starting y-coordinate of the scale bar
x_end = x_start + scale_bar_length # Ending x-coordinate of the scale bar
# Place the legend outside the plot
ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1), borderaxespad=0)
# Draw the scale bar line
ax.hlines(y_start, x_start, x_end, colors='black', linewidth=3)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
plt.tight_layout()
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_transcriptmap_basal_Scalebar50.png', dpi=700, bbox_inches='tight', format='png')
plt.show()
domain maps and transcript maps for upper spikelet ridges¶
import numpy as np
import math
from shapely.geometry import Polygon, MultiPolygon
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
import matplotlib.patches as patches
# Define custom colors for specific clusters
custom_cluster_colors = {
'0': '#a70e62',
'12': '#6ecad5',
'3': '#f0b932',
'11': '#fed2ff',
'4': '#ffd95a',
'1': 'white',
'2': 'white'
}
# Default color for unspecified clusters
default_color = 'none'
# Define rotation parameters
angle = -22 # Degrees
radians = math.radians(angle)
cos_angle = math.cos(radians)
sin_angle = math.sin(radians)
rotation_matrix = np.array([[cos_angle, -sin_angle], [sin_angle, cos_angle]])
# Calculate the center of the filtered segmentation data for rotation
segmentation_center_x = merged_df['Geometry'].centroid.x.mean()
segmentation_center_y = merged_df['Geometry'].centroid.y.mean()
# Apply rotation and assign cluster colors to each Polygon
rotated_patches_list = []
for _, row in merged_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
# Assign color based on cluster and set edge color based on whether it's default
if cluster in custom_cluster_colors:
color = custom_cluster_colors[cluster]
edge_color = '#595959' # Default edge color for specified clusters
else:
color = default_color
edge_color = 'none' # Edge color for unspecified clusters
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
centered_coords = np.array(poly.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
rotated_poly = Polygon(rotated_coords)
patch = patches.Polygon(rotated_coords, closed=True, facecolor=color, edgecolor=edge_color, linewidth=0.5)
rotated_patches_list.append(patch)
elif isinstance(shape, Polygon):
centered_coords = np.array(shape.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
rotated_poly = Polygon(rotated_coords)
patch = patches.Polygon(rotated_coords, closed=True, facecolor=color, edgecolor=edge_color, linewidth=0.5)
rotated_patches_list.append(patch)
# Plot the rotated and colored polygons
fig, ax = plt.subplots(figsize=(10, 10))
# Add rotated polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)
# Calculate axis limits
rotated_x_coords = [coord[0] for patch in rotated_patches_list for coord in patch.get_xy()]
rotated_y_coords = [coord[1] for patch in rotated_patches_list for coord in patch.get_xy()]
x_min, x_max = min(rotated_x_coords), max(rotated_x_coords)
y_min, y_max = min(rotated_y_coords), max(rotated_y_coords)
# Set axis limits
ax.set_ylim(7315, 7570)
ax.set_xlim(3960, 4088)
ax.set_aspect('equal')
# Add a 25-micron scale bar
scale_bar_length = 50 # Microns
x_start = 3965 # Starting x-coordinate of the scale bar
y_start = 7320 # Starting y-coordinate of the scale bar
x_end = x_start + scale_bar_length # Ending x-coordinate of the scale bar
# Draw the scale bar line
ax.hlines(y_start, x_start, x_end, colors='black', linewidth=3)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
plt.tight_layout()
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_domainmap_central_Scalebar50.png', dpi=700, bbox_inches='tight', format='png')
plt.show()
clusters_to_include = ['0', '3', '4', '12', '11', '1', '2' ]
genes_of_interest = {
'TraesCS4A02G256700': '#C51D71', #knox
'TraesCS1A02G418200': 'darkturquoise', #nl1
'TraesCS7D02G246100': '#F57BEC'#cuc
}
# I want to filter down my datasets to make this process faster to map the cell segmentation and transcripts
# Step 1: Filter `merged_df` to include only selected clusters
filtered_merged_df = merged_df[merged_df['clusters'].astype(str).isin(clusters_to_include)]
# Convert the 'EntityID' column of filtered_merged_df into a list
entity_id_list = filtered_merged_df['EntityID'].tolist()
#note, I will use the original merged_df for the graph because this mantains the same segmentation_center_x and segmentation_center_y
# Filter filtered_transcripts_df to contain only rows where 'EntityID' is in the list and only genes of interest
filtered_transcripts_df2 = filtered_transcripts_df.copy()
filtered_transcripts_df2 = filtered_transcripts_df2[filtered_transcripts_df2['EntityID'].isin(entity_id_list)]
filtered_transcripts_df2 = filtered_transcripts_df2[filtered_transcripts_df2['gene'].isin(genes_of_interest.keys())]
# Define rotation parameters
angle = -22 # Degrees
radians = math.radians(angle)
cos_angle = math.cos(radians)
sin_angle = math.sin(radians)
rotation_matrix = np.array([[cos_angle, -sin_angle], [sin_angle, cos_angle]])
# Calculate the center of the segmentation data for rotation
segmentation_center_x = merged_df['Geometry'].centroid.x.mean()
segmentation_center_y = merged_df['Geometry'].centroid.y.mean()
rotated_patches_list = []
for _, row in filtered_merged_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Ensure clusters are strings to match the keys in the color dictionary
# Set face and edge colors for included clusters
face_color = '#F3F3F3' # Default fill color for included clusters
edge_color = 'dimgrey' # Default edge color
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
centered_coords = np.array(poly.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
patch = patches.Polygon(rotated_coords, closed=True, facecolor=face_color, edgecolor=edge_color, linewidth=0.5)
rotated_patches_list.append(patch)
elif isinstance(shape, Polygon):
centered_coords = np.array(shape.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
patch = patches.Polygon(rotated_coords, closed=True, facecolor=face_color, edgecolor=edge_color, linewidth=0.5)
rotated_patches_list.append(patch)
# Step 2: Rotate transcript coordinates without filtering by polygon containment
rotated_transcript_data = {}
for gene, color in genes_of_interest.items():
transcripts_of_interest = filtered_transcripts_df2[filtered_transcripts_df2['gene'] == gene].copy()
transcript_coords = transcripts_of_interest[['global_x', 'global_y']].to_numpy()
# Rotate coordinates
centered_transcript_coords = transcript_coords - np.array([segmentation_center_x, segmentation_center_y])
rotated_transcript_coords = np.dot(centered_transcript_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
# Store rotated coordinates and color for plotting
rotated_transcript_data[gene] = {
'x': rotated_transcript_coords[:, 0],
'y': rotated_transcript_coords[:, 1],
'color': color
}
# Plot the rotated segmentation and transcripts
fig, ax = plt.subplots(figsize=(10, 10))
# Add rotated segmentation polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)
# Plot each gene's rotated transcripts with unique colors
for gene, data in rotated_transcript_data.items():
ax.scatter(data['x'], data['y'], color=data['color'], s=25, alpha=1, label=gene)
# Set axis limits
ax.set_ylim(7315, 7570)
ax.set_xlim(3960, 4088)
ax.set_aspect('equal')
# Add a 25-micron scale bar
scale_bar_length = 50 # Microns
x_start = 3965 # Starting x-coordinate of the scale bar
y_start = 7320 # Starting y-coordinate of the scale bar
x_end = x_start + scale_bar_length # Ending x-coordinate of the scale bar
# Place the legend outside the plot
ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1), borderaxespad=0)
# Draw the scale bar line
ax.hlines(y_start, x_start, x_end, colors='black', linewidth=3)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
plt.tight_layout()
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_transcriptnmap_central_Scalebar50.png', dpi=700, bbox_inches='tight', format='png')
plt.show()
Mapping out the transcripts for select genes for figure 5G and 5L¶
clusters_to_include = ['0', '3', '4', '12', '11', '1', '2', '9', '13', '6'] # Example cluster list
genes_of_interest = {
'TraesCS3D02G357400': '#7CD250', #qSH1
'TraesCS4A02G123800': '#34618D'
}
# I want to filter transcripts by only in the cells of interest, I need to do the following filter steps to make a list of cells of interest..
# Step 1: Filter `merged_df` to include only selected clusters
filtered_merged_df = merged_df[merged_df['clusters'].astype(str).isin(clusters_to_include)]
# Convert the 'EntityID' column of filtered_merged_df into a list
entity_id_list = filtered_merged_df['EntityID'].tolist()
#note, I will use the original merged_df for the graph because this mantains the same segmentation_center_x and segmentation_center_y
# Filter filtered_transcripts_df to contain only rows where 'EntityID' is in the list and only genes of interest
filtered_transcripts_df2 = filtered_transcripts_df.copy()
filtered_transcripts_df2 = filtered_transcripts_df2[filtered_transcripts_df2['EntityID'].isin(entity_id_list)]
filtered_transcripts_df2 = filtered_transcripts_df2[filtered_transcripts_df2['gene'].isin(genes_of_interest.keys())]
# Define rotation parameters
angle = -22 # Degrees
radians = math.radians(angle)
cos_angle = math.cos(radians)
sin_angle = math.sin(radians)
rotation_matrix = np.array([[cos_angle, -sin_angle], [sin_angle, cos_angle]])
# Calculate the center of the segmentation data for rotation
segmentation_center_x = merged_df['Geometry'].centroid.x.mean()
segmentation_center_y = merged_df['Geometry'].centroid.y.mean()
rotated_patches_list = []
for _, row in filtered_merged_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Ensure clusters are strings to match the keys in the color dictionary
# Set face and edge colors for included clusters
face_color = '#FDFDFD' # Default fill color for included clusters
edge_color = 'dimgrey' # Default edge color
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
centered_coords = np.array(poly.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
patch = patches.Polygon(rotated_coords, closed=True, facecolor=face_color, edgecolor=edge_color, linewidth=0.5)
rotated_patches_list.append(patch)
elif isinstance(shape, Polygon):
centered_coords = np.array(shape.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
patch = patches.Polygon(rotated_coords, closed=True, facecolor=face_color, edgecolor=edge_color, linewidth=0.5)
rotated_patches_list.append(patch)
# Step 2: Rotate transcript coordinates without filtering by polygon containment
rotated_transcript_data = {}
for gene, color in genes_of_interest.items():
transcripts_of_interest = filtered_transcripts_df2[filtered_transcripts_df2['gene'] == gene].copy()
transcript_coords = transcripts_of_interest[['global_x', 'global_y']].to_numpy()
# Rotate coordinates
centered_transcript_coords = transcript_coords - np.array([segmentation_center_x, segmentation_center_y])
rotated_transcript_coords = np.dot(centered_transcript_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
# Store rotated coordinates and color for plotting
rotated_transcript_data[gene] = {
'x': rotated_transcript_coords[:, 0],
'y': rotated_transcript_coords[:, 1],
'color': color
}
# Plot the rotated segmentation and transcripts
fig, ax = plt.subplots(figsize=(10, 10))
# Add rotated segmentation polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)
# Plot each gene's rotated transcripts with unique colors
for gene, data in rotated_transcript_data.items():
ax.scatter(data['x'], data['y'], color=data['color'], s=4, alpha=1, label=gene)
# Set axis limits
ax.set_ylim(6650, 7825)
ax.set_xlim(3750, 4150)
ax.set_aspect('equal')
# Add a 25-micron scale bar
scale_bar_length = 250 # Microns
x_start = 3755 # Starting x-coordinate of the scale bar
y_start = 6680 # Starting y-coordinate of the scale bar
x_end = x_start + scale_bar_length # Ending x-coordinate of the scale bar
# Place the legend outside the plot
ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1), borderaxespad=0)
# Draw the scale bar line
ax.hlines(y_start, x_start, x_end, colors='black', linewidth=3)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
plt.tight_layout()
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_RIL1_AIL6_Scalebar250.png', dpi=700, bbox_inches='tight', format='png', transparent=True)
plt.show()
clusters_to_include = ['0', '3', '4', '12', '11', '1', '2', '9', '13', '6'] # Example cluster list
genes_of_interest = {
'TraesCS5A02G265900': '#7CD250', #SPL17
'TraesCS1D02G162600': '#34618D' # YABBY1
}
# I want to filter transcripts by only in the cells of interest, I need to do the following filter steps to make a list of cells of interest..
# Step 1: Filter `merged_df` to include only selected clusters
filtered_merged_df = merged_df[merged_df['clusters'].astype(str).isin(clusters_to_include)]
# Convert the 'EntityID' column of filtered_merged_df into a list
entity_id_list = filtered_merged_df['EntityID'].tolist()
#note, I will use the original merged_df for the graph because this mantains the same segmentation_center_x and segmentation_center_y
# Filter filtered_transcripts_df to contain only rows where 'EntityID' is in the list and only genes of interest
filtered_transcripts_df2 = filtered_transcripts_df.copy()
filtered_transcripts_df2 = filtered_transcripts_df2[filtered_transcripts_df2['EntityID'].isin(entity_id_list)]
filtered_transcripts_df2 = filtered_transcripts_df2[filtered_transcripts_df2['gene'].isin(genes_of_interest.keys())]
# Define rotation parameters
angle = -22 # Degrees
radians = math.radians(angle)
cos_angle = math.cos(radians)
sin_angle = math.sin(radians)
rotation_matrix = np.array([[cos_angle, -sin_angle], [sin_angle, cos_angle]])
# Calculate the center of the segmentation data for rotation
segmentation_center_x = merged_df['Geometry'].centroid.x.mean()
segmentation_center_y = merged_df['Geometry'].centroid.y.mean()
rotated_patches_list = []
for _, row in filtered_merged_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Ensure clusters are strings to match the keys in the color dictionary
# Set face and edge colors for included clusters
face_color = '#FDFDFD' # Default fill color for included clusters
edge_color = 'dimgrey' # Default edge color
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
centered_coords = np.array(poly.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
patch = patches.Polygon(rotated_coords, closed=True, facecolor=face_color, edgecolor=edge_color, linewidth=0.5)
rotated_patches_list.append(patch)
elif isinstance(shape, Polygon):
centered_coords = np.array(shape.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
patch = patches.Polygon(rotated_coords, closed=True, facecolor=face_color, edgecolor=edge_color, linewidth=0.5)
rotated_patches_list.append(patch)
# Step 2: Rotate transcript coordinates without filtering by polygon containment
rotated_transcript_data = {}
for gene, color in genes_of_interest.items():
transcripts_of_interest = filtered_transcripts_df2[filtered_transcripts_df2['gene'] == gene].copy()
transcript_coords = transcripts_of_interest[['global_x', 'global_y']].to_numpy()
# Rotate coordinates
centered_transcript_coords = transcript_coords - np.array([segmentation_center_x, segmentation_center_y])
rotated_transcript_coords = np.dot(centered_transcript_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
# Store rotated coordinates and color for plotting
rotated_transcript_data[gene] = {
'x': rotated_transcript_coords[:, 0],
'y': rotated_transcript_coords[:, 1],
'color': color
}
# Plot the rotated segmentation and transcripts
fig, ax = plt.subplots(figsize=(10, 10))
# Add rotated segmentation polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)
# Plot each gene's rotated transcripts with unique colors
for gene, data in rotated_transcript_data.items():
ax.scatter(data['x'], data['y'], color=data['color'], s=6, alpha=1, label=gene)
# Set axis limits
ax.set_ylim(6650, 7825)
ax.set_xlim(3750, 4150)
ax.set_aspect('equal')
# Add a 25-micron scale bar
scale_bar_length = 250 # Microns
x_start = 3755 # Starting x-coordinate of the scale bar
y_start = 6680 # Starting y-coordinate of the scale bar
x_end = x_start + scale_bar_length # Ending x-coordinate of the scale bar
# Place the legend outside the plot
ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1), borderaxespad=0)
# Draw the scale bar line
ax.hlines(y_start, x_start, x_end, colors='black', linewidth=3)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
plt.tight_layout()
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_SPL17_YABBY1_Scalebar250.png', dpi=700, bbox_inches='tight', format='png')
plt.show()
now mapping by cell groups¶
# Use the index of cellgroups as filtered EntityIDs
filtered_cellgroups = cellgroups[cellgroups['Custom cell groups'].str.startswith('LR')]
filtered_entities = filtered_cellgroups.index # Assuming the index contains 'EntityID'
# Define custom colors for specific clusters
custom_cluster_colors = {
'0': '#a70e62',
'12': '#6ecad5',
'3': '#f0b932',
'11': '#fed2ff',
'4': '#ffd95a'
}
# Default color for unspecified clusters
default_color = "white"
# Define rotation parameters
angle = -22 # Degrees
radians = math.radians(angle)
cos_angle = math.cos(radians)
sin_angle = math.sin(radians)
rotation_matrix = np.array([[cos_angle, -sin_angle], [sin_angle, cos_angle]])
# Calculate the center of the filtered segmentation data for rotation
segmentation_center_x = merged_df['Geometry'].centroid.x.mean()
segmentation_center_y = merged_df['Geometry'].centroid.y.mean()
# Apply rotation and assign conditional colors to each Polygon
rotated_patches_list = []
for _, row in merged_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
entity_id = row['EntityID'] # EntityID of the current row
# Assign colors based on whether the EntityID is in cellgroups
if entity_id in filtered_entities:
face_color = custom_cluster_colors.get(cluster, default_color) # Use domain colors
edge_color = '#595959'
else:
face_color = 'none' # Transparent fill for non-cellgroup entities
edge_color = '#595959' # Outer cell edge color
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
centered_coords = np.array(poly.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
rotated_poly = Polygon(rotated_coords)
patch = patches.Polygon(rotated_coords, closed=True, facecolor=face_color, edgecolor=edge_color, linewidth=0.5)
rotated_patches_list.append(patch)
elif isinstance(shape, Polygon):
centered_coords = np.array(shape.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
rotated_poly = Polygon(rotated_coords)
patch = patches.Polygon(rotated_coords, closed=True, facecolor=face_color, edgecolor=edge_color, linewidth=0.5)
rotated_patches_list.append(patch)
# Plot the rotated and colored polygons
fig, ax = plt.subplots(figsize=(10, 10))
# Add rotated polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)
# Calculate axis limits
rotated_x_coords = [coord[0] for patch in rotated_patches_list for coord in patch.get_xy()]
rotated_y_coords = [coord[1] for patch in rotated_patches_list for coord in patch.get_xy()]
x_min, x_max = min(rotated_x_coords), max(rotated_x_coords)
y_min, y_max = min(rotated_y_coords), max(rotated_y_coords)
# Set axis limits
ax.set_ylim(6650, 7825)
ax.set_xlim(3750, 4150)
ax.set_aspect('equal')
# Add a 25-micron scale bar
scale_bar_length = 250 # Microns
x_start = 3755 # Starting x-coordinate of the scale bar
y_start = 6680 # Starting y-coordinate of the scale bar
x_end = x_start + scale_bar_length # Ending x-coordinate of the scale bar
# Draw the scale bar line
ax.hlines(y_start, x_start, x_end, colors='black', linewidth=3)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
plt.tight_layout()
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_leafridgegroups_Scalebar250.png', dpi=700, bbox_inches='tight', format='png')
plt.show()
# Use the index of cellgroups as filtered EntityIDs
filtered_cellgroups = cellgroups[cellgroups['Custom cell groups'].str.startswith('SR')]
filtered_entities = filtered_cellgroups.index # Assuming the index contains 'EntityID'
# Define custom colors for specific clusters
custom_cluster_colors = {
'0': '#a70e62',
'12': '#6ecad5',
'3': '#f0b932',
'11': '#fed2ff',
'4': '#ffd95a',
'1' : '#e10e3c',
'2' : '#f47140'
}
# Default color for unspecified clusters
default_color = "white"
# Define rotation parameters
angle = -22 # Degrees
radians = math.radians(angle)
cos_angle = math.cos(radians)
sin_angle = math.sin(radians)
rotation_matrix = np.array([[cos_angle, -sin_angle], [sin_angle, cos_angle]])
# Calculate the center of the filtered segmentation data for rotation
segmentation_center_x = merged_df['Geometry'].centroid.x.mean()
segmentation_center_y = merged_df['Geometry'].centroid.y.mean()
# Apply rotation and assign conditional colors to each Polygon
rotated_patches_list = []
for _, row in merged_df.iterrows():
shape = row['Geometry']
cluster = str(row['clusters']) # Convert cluster to string to match custom colors dict keys
entity_id = row['EntityID'] # EntityID of the current row
# Assign colors based on whether the EntityID is in cellgroups
if entity_id in filtered_entities:
face_color = custom_cluster_colors.get(cluster, default_color) # Use domain colors
edge_color = '#595959'
else:
face_color = 'none' # Transparent fill for non-cellgroup entities
edge_color = '#595959' # Outer cell edge color
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
centered_coords = np.array(poly.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
rotated_poly = Polygon(rotated_coords)
patch = patches.Polygon(rotated_coords, closed=True, facecolor=face_color, edgecolor=edge_color, linewidth=0.5)
rotated_patches_list.append(patch)
elif isinstance(shape, Polygon):
centered_coords = np.array(shape.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
rotated_poly = Polygon(rotated_coords)
patch = patches.Polygon(rotated_coords, closed=True, facecolor=face_color, edgecolor=edge_color, linewidth=0.5)
rotated_patches_list.append(patch)
# Plot the rotated and colored polygons
fig, ax = plt.subplots(figsize=(10, 10))
# Add rotated polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)
# Calculate axis limits
rotated_x_coords = [coord[0] for patch in rotated_patches_list for coord in patch.get_xy()]
rotated_y_coords = [coord[1] for patch in rotated_patches_list for coord in patch.get_xy()]
x_min, x_max = min(rotated_x_coords), max(rotated_x_coords)
y_min, y_max = min(rotated_y_coords), max(rotated_y_coords)
# Set axis limits
ax.set_ylim(6650, 7825)
ax.set_xlim(3750, 4150)
ax.set_aspect('equal')
# Add a 25-micron scale bar
scale_bar_length = 250 # Microns
x_start = 3755 # Starting x-coordinate of the scale bar
y_start = 6680 # Starting y-coordinate of the scale bar
x_end = x_start + scale_bar_length # Ending x-coordinate of the scale bar
# Draw the scale bar line
ax.hlines(y_start, x_start, x_end, colors='black', linewidth=3)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_spikeletridgegroups_Scalebar250.png', dpi=700, bbox_inches='tight', format='png')
plt.tight_layout()
plt.show()
Creating composite maps¶
I will perform the same steps on four spikelet ridges: 8 - 11 I will rotate them to the same orientation, and summate transcript counts within a 25x25 grid
SR8¶
# 1. Select SR8 cells only
sr8_group = cellgroups[cellgroups['Custom cell groups'] == 'SR8']
sr8_list = sr8_group.index.astype(str).tolist()
# 2. Filter the segmentation data for SR8
segmentation_df['EntityID'] = segmentation_df['EntityID'].astype(str) # Ensure EntityID is string
filtered_segmentation_df = segmentation_df[segmentation_df['EntityID'].isin(sr8_list)]
# 3. Create patches for visualization and calculate the bounding box
filtered_patches_list = []
all_coords = [] # Store all coordinates for bounding box calculation
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
coords = list(poly.exterior.coords)
all_coords.extend(coords)
patch = patches.Polygon(coords, closed=True, facecolor="gray", edgecolor='white', linewidth=0.25)
filtered_patches_list.append(patch)
elif isinstance(shape, Polygon):
coords = list(shape.exterior.coords)
all_coords.extend(coords)
patch = patches.Polygon(coords, closed=True, facecolor="gray", edgecolor='white', linewidth=0.25)
filtered_patches_list.append(patch)
# Calculate bounding box from all coordinates
all_coords = np.array(all_coords)
x_min, x_max = all_coords[:, 0].min(), all_coords[:, 0].max()
y_min, y_max = all_coords[:, 1].min(), all_coords[:, 1].max()
# 4. Filter transcript data within the bounding box
transcripts_in_area = transcripts_df[
(transcripts_df['global_x'] >= x_min) & (transcripts_df['global_x'] <= x_max) &
(transcripts_df['global_y'] >= y_min) & (transcripts_df['global_y'] <= y_max)
]
# Define genes and their colors
genes_of_interest = {
'TraesCS4A02G256700': '#C51D71', #knox
'TraesCS7D02G246100': '#F57BEC', #cuc3
'TraesCS4A02G123800': '#3197F9', #AIL6
'TraesCS4A02G016000': '#FF8700'#ILI3
}
# Define rotation parameters
angle = 159 # Degrees
radians = math.radians(angle)
rotation_matrix = np.array([[math.cos(radians), -math.sin(radians)], [math.sin(radians), math.cos(radians)]])
# Calculate the center of the filtered segmentation data for rotation
segmentation_center_x = filtered_segmentation_df['Geometry'].centroid.x.mean()
segmentation_center_y = filtered_segmentation_df['Geometry'].centroid.y.mean()
# Rotate and mirror segmentation polygons for visualization and filtering
rotated_patches_list = []
rotated_polygons = []
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
coords = poly.exterior.coords
centered_coords = coords - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
mirrored_coords = rotated_coords * np.array([1, -1])
rotated_poly = Polygon(mirrored_coords)
rotated_polygons.append(rotated_poly)
patch = patches.Polygon(mirrored_coords, closed=True, facecolor="#F3F3F3", edgecolor='dimgrey', linewidth=0.5)
rotated_patches_list.append(patch)
elif isinstance(shape, Polygon):
coords = shape.exterior.coords
centered_coords = coords - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
mirrored_coords = rotated_coords * np.array([1, -1])
rotated_poly = Polygon(mirrored_coords)
rotated_polygons.append(rotated_poly)
patch = patches.Polygon(mirrored_coords, closed=True, facecolor="#F3F3F3", edgecolor='dimgrey', linewidth=0.25)
rotated_patches_list.append(patch)
# Apply rotation to transcript coordinates
rotated_transcript_data = {}
for gene, color in genes_of_interest.items():
transcripts_of_interest = transcripts_in_area[transcripts_in_area['gene'] == gene].copy()
coords = transcripts_of_interest[['global_x', 'global_y']].to_numpy()
centered_coords = coords - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
mirrored_coords = rotated_coords * np.array([1, -1])
# Filter transcripts within rotated polygons
filtered_x, filtered_y = [], []
for x, y in mirrored_coords:
point = Point(x, y)
if any(poly.contains(point) for poly in rotated_polygons):
filtered_x.append(x)
filtered_y.append(y)
rotated_transcript_data[gene] = {'x': filtered_x, 'y': filtered_y, 'color': color}
# Calculate grid bins and extents
mirrored_x_coords = [coord[0] for patch in rotated_patches_list for coord in patch.get_xy()]
mirrored_y_coords = [coord[1] for patch in rotated_patches_list for coord in patch.get_xy()]
x_min, x_max = min(mirrored_x_coords), max(mirrored_x_coords)
y_min, y_max = min(mirrored_y_coords), max(mirrored_y_coords)
num_bins = 25
x_bins = np.linspace(x_min, x_max, num_bins + 1)
y_bins = np.linspace(y_min, y_max, num_bins + 1)
# Calculate frequency data for heatmaps
frequency_data = {gene: np.zeros((num_bins, num_bins)) for gene in genes_of_interest.keys()}
for gene, data in rotated_transcript_data.items():
for x, y in zip(data['x'], data['y']):
x_idx = np.digitize(x, x_bins) - 1
y_idx = np.digitize(y, y_bins) - 1
if 0 <= x_idx < num_bins and 0 <= y_idx < num_bins:
frequency_data[gene][y_idx, x_idx] += 1
# Plot rotated polygons and transcript points
fig, ax = plt.subplots(figsize=(10, 10))
# Add rotated polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)
# Plot transcript points
for gene, data in rotated_transcript_data.items():
ax.scatter(data['x'], data['y'], color=data['color'], s=20, alpha=1, label=gene)
# Add grid lines
for x_bin in x_bins:
ax.axvline(x=x_bin, color='gray', linewidth=0.5, alpha=0.85)
for y_bin in y_bins:
ax.axhline(y=y_bin, color='gray', linewidth=0.5, alpha=0.85)
# Set axis properties
ax.set_xlim([x_min, x_max])
ax.set_ylim([y_min, y_max])
ax.set_aspect('equal')
ax.set_xticks(x_bins[:-1] + np.diff(x_bins) / 2)
ax.set_xticklabels([f"X_Bin{i+1}" for i in range(num_bins)], rotation=90)
ax.set_yticks(y_bins[:-1] + np.diff(y_bins) / 2)
ax.set_yticklabels([f"Y_Bin{i+1}" for i in range(num_bins)])
# Add title and legend
ax.set_title("Spikelet 8 with Transcripts")
ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1))
plt.tight_layout()
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_spikelet8_bin25.png', dpi=700, bbox_inches='tight', format='png')
plt.show()
# Plot heatmaps for each gene
fig, axes = plt.subplots(nrows=4, ncols=1, figsize=(8, 20), constrained_layout=True)
for ax, (gene, freq_matrix) in zip(axes, frequency_data.items()):
im = ax.imshow(freq_matrix, cmap='viridis', origin='lower')
ax.set_title(f"Spikelet 8 Transcript Frequency for {gene}", fontsize=12)
ax.set_xlabel("X Bin Index")
ax.set_ylabel("Y Bin Index")
fig.colorbar(im, ax=ax, orientation='vertical', pad=0.02).set_label("Frequency")
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_spikelet8_bin25_heatmaps.png', dpi=700, bbox_inches='tight', format='png')
plt.show()
#saving the grid data later for concatenation
frequency_data_spikelet8 = frequency_data.copy()
SR10¶
# 1. Select SR10 cells only (and remove a few that stick out and make alignment not possible
sr10_group = cellgroups[cellgroups['Custom cell groups'] == 'SR10']
remove_list = ['2343479300072100056', '2343479300072100054', '2343479300059100115'] # list of strings to remove
sr10_list = sr10_group.index.astype(str).tolist()
sr10_list = [item for item in sr10_list if item not in remove_list]
# 2. Filter the segmentation data for SR10
segmentation_df['EntityID'] = segmentation_df['EntityID'].astype(str) # Ensure EntityID is string
filtered_segmentation_df = segmentation_df[segmentation_df['EntityID'].isin(sr10_list)]
# 3. Create patches for visualization and calculate the bounding box
filtered_patches_list = []
all_coords = [] # Store all coordinates for bounding box calculation
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
coords = list(poly.exterior.coords)
all_coords.extend(coords)
patch = patches.Polygon(coords, closed=True, facecolor="gray", edgecolor='white', linewidth=0.25)
filtered_patches_list.append(patch)
elif isinstance(shape, Polygon):
coords = list(shape.exterior.coords)
all_coords.extend(coords)
patch = patches.Polygon(coords, closed=True, facecolor="gray", edgecolor='white', linewidth=0.25)
filtered_patches_list.append(patch)
# Calculate bounding box from all coordinates
all_coords = np.array(all_coords)
x_min, x_max = all_coords[:, 0].min(), all_coords[:, 0].max()
y_min, y_max = all_coords[:, 1].min(), all_coords[:, 1].max()
# 4. Filter transcript data within the bounding box
transcripts_in_area = transcripts_df[
(transcripts_df['global_x'] >= x_min) & (transcripts_df['global_x'] <= x_max) &
(transcripts_df['global_y'] >= y_min) & (transcripts_df['global_y'] <= y_max)
]
# Define genes and their colors
genes_of_interest = {
'TraesCS4A02G256700': '#C51D71', #knox
'TraesCS7D02G246100': '#F57BEC', #cuc3
'TraesCS4A02G123800': '#3197F9', #AIL6
'TraesCS4A02G016000': '#FF8700'#ILI3
}
# Define rotation parameters
angle = 159 # Degrees
radians = math.radians(angle)
rotation_matrix = np.array([[math.cos(radians), -math.sin(radians)], [math.sin(radians), math.cos(radians)]])
# Calculate the center of the filtered segmentation data for rotation
segmentation_center_x = filtered_segmentation_df['Geometry'].centroid.x.mean()
segmentation_center_y = filtered_segmentation_df['Geometry'].centroid.y.mean()
# Rotate and mirror segmentation polygons for visualization and filtering
rotated_patches_list = []
rotated_polygons = []
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
coords = poly.exterior.coords
centered_coords = coords - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
mirrored_coords = rotated_coords * np.array([1, -1])
rotated_poly = Polygon(mirrored_coords)
rotated_polygons.append(rotated_poly)
patch = patches.Polygon(mirrored_coords, closed=True, facecolor="#F3F3F3", edgecolor='dimgrey', linewidth=0.5)
rotated_patches_list.append(patch)
elif isinstance(shape, Polygon):
coords = shape.exterior.coords
centered_coords = coords - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
mirrored_coords = rotated_coords * np.array([1, -1])
rotated_poly = Polygon(mirrored_coords)
rotated_polygons.append(rotated_poly)
patch = patches.Polygon(mirrored_coords, closed=True, facecolor="#F3F3F3", edgecolor='dimgrey', linewidth=0.25)
rotated_patches_list.append(patch)
# Apply rotation to transcript coordinates
rotated_transcript_data = {}
for gene, color in genes_of_interest.items():
transcripts_of_interest = transcripts_in_area[transcripts_in_area['gene'] == gene].copy()
coords = transcripts_of_interest[['global_x', 'global_y']].to_numpy()
centered_coords = coords - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
mirrored_coords = rotated_coords * np.array([1, -1])
# Filter transcripts within rotated polygons
filtered_x, filtered_y = [], []
for x, y in mirrored_coords:
point = Point(x, y)
if any(poly.contains(point) for poly in rotated_polygons):
filtered_x.append(x)
filtered_y.append(y)
rotated_transcript_data[gene] = {'x': filtered_x, 'y': filtered_y, 'color': color}
# Calculate grid bins and extents
mirrored_x_coords = [coord[0] for patch in rotated_patches_list for coord in patch.get_xy()]
mirrored_y_coords = [coord[1] for patch in rotated_patches_list for coord in patch.get_xy()]
x_min, x_max = min(mirrored_x_coords), max(mirrored_x_coords)
y_min, y_max = min(mirrored_y_coords), max(mirrored_y_coords)
num_bins = 25
x_bins = np.linspace(x_min, x_max, num_bins + 1)
y_bins = np.linspace(y_min, y_max, num_bins + 1)
# Calculate frequency data for heatmaps
frequency_data = {gene: np.zeros((num_bins, num_bins)) for gene in genes_of_interest.keys()}
for gene, data in rotated_transcript_data.items():
for x, y in zip(data['x'], data['y']):
x_idx = np.digitize(x, x_bins) - 1
y_idx = np.digitize(y, y_bins) - 1
if 0 <= x_idx < num_bins and 0 <= y_idx < num_bins:
frequency_data[gene][y_idx, x_idx] += 1
# Plot rotated polygons and transcript points
fig, ax = plt.subplots(figsize=(10, 10))
# Add rotated polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)
# Plot transcript points
for gene, data in rotated_transcript_data.items():
ax.scatter(data['x'], data['y'], color=data['color'], s=20, alpha=1, label=gene)
# Add grid lines
for x_bin in x_bins:
ax.axvline(x=x_bin, color='gray', linewidth=0.5, alpha=0.85)
for y_bin in y_bins:
ax.axhline(y=y_bin, color='gray', linewidth=0.5, alpha=0.85)
# Set axis properties
ax.set_xlim([x_min, x_max])
ax.set_ylim([y_min, y_max])
ax.set_aspect('equal')
ax.set_xticks(x_bins[:-1] + np.diff(x_bins) / 2)
ax.set_xticklabels([f"X_Bin{i+1}" for i in range(num_bins)], rotation=90)
ax.set_yticks(y_bins[:-1] + np.diff(y_bins) / 2)
ax.set_yticklabels([f"Y_Bin{i+1}" for i in range(num_bins)])
# Add title and legend
ax.set_title("Spikelet 10 with Transcripts")
ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1))
plt.tight_layout()
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_spikelet10_bin25.png', dpi=700, bbox_inches='tight', format='png', transparent=True)
plt.show()
# Plot heatmaps for each gene
fig, axes = plt.subplots(nrows=4, ncols=1, figsize=(8, 20), constrained_layout=True)
for ax, (gene, freq_matrix) in zip(axes, frequency_data.items()):
im = ax.imshow(freq_matrix, cmap='viridis', origin='lower')
ax.set_title(f"Spikelet 10 Transcript Frequency for {gene}", fontsize=12)
ax.set_xlabel("X Bin Index")
ax.set_ylabel("Y Bin Index")
fig.colorbar(im, ax=ax, orientation='vertical', pad=0.02).set_label("Frequency")
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_spikelet10_bin25_heatmaps.png', dpi=700, bbox_inches='tight', format='png')
plt.show()
frequency_data_spikelet10 = frequency_data.copy()
SR9¶
Note: SR9 and SR11 need to be y-mirrored to align the same as SR9 and SR11
# 1. Select SR9 cells only
sr9_group = cellgroups[cellgroups['Custom cell groups'] == 'SR9']
sr9_list = sr9_group.index.astype(str).tolist()
# 2. Filter the segmentation data for SR9
segmentation_df['EntityID'] = segmentation_df['EntityID'].astype(str) # Ensure EntityID is string
filtered_segmentation_df = segmentation_df[segmentation_df['EntityID'].isin(sr9_list)]
# Define genes and their colors
genes_of_interest = {
'TraesCS4A02G256700': '#C51D71', #knox
'TraesCS7D02G246100': '#F57BEC', #cuc3
'TraesCS4A02G123800': '#3197F9', #AIL6
'TraesCS4A02G016000': '#FF8700'#ILI3
}
# 3. Create patches for visualization and calculate the bounding box
filtered_patches_list = []
all_coords = [] # Store all coordinates for bounding box calculation
# Add each filtered cell's polygon to the plot
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor="gray", edgecolor='white', linewidth=0.25)
filtered_patches_list.append(patch)
elif isinstance(shape, Polygon):
patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor="gray", edgecolor='white', linewidth=0.25)
filtered_patches_list.append(patch)
# Calculate bounding box for the filtered segmentation cells
filtered_x_coords = [coord[0] for patch in filtered_patches_list for coord in patch.get_xy()]
filtered_y_coords = [coord[1] for patch in filtered_patches_list for coord in patch.get_xy()]
x_min, x_max = min(filtered_x_coords), max(filtered_x_coords)
y_min, y_max = min(filtered_y_coords), max(filtered_y_coords)
# Plot the transcripts on top (make sure the filtered transcript coordinates are within this area)
transcripts_in_area = transcripts_df[
(transcripts_df['global_x'] >= x_min) & (transcripts_df['global_x'] <= x_max) &
(transcripts_df['global_y'] >= y_min) & (transcripts_df['global_y'] <= y_max)
]
# Define rotation parameters
angle = 159 # Degrees
radians = np.radians(angle)
rotation_matrix = np.array([[np.cos(radians), -np.sin(radians)], [np.sin(radians), np.cos(radians)]])
# Calculate the center of the filtered segmentation data for rotation
segmentation_center_x = filtered_segmentation_df['Geometry'].centroid.x.mean()
segmentation_center_y = filtered_segmentation_df['Geometry'].centroid.y.mean()
# Apply rotation and y-mirroring to each Polygon in the filtered segmentation data
rotated_patches_list = []
rotated_polygons = [] # Store rotated polygons for containment checks
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
centered_coords = poly.exterior.coords - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
mirrored_coords = rotated_coords * np.array([1, -1]) # Mirror only the y-axis
rotated_poly = Polygon(mirrored_coords)
rotated_polygons.append(rotated_poly)
patch = patches.Polygon(mirrored_coords, closed=True, facecolor="#F3F3F3", edgecolor='dimgrey', linewidth=0.5)
rotated_patches_list.append(patch)
elif isinstance(shape, Polygon):
centered_coords = shape.exterior.coords - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
mirrored_coords = rotated_coords * np.array([1, -1]) # Mirror only the y-axis
rotated_poly = Polygon(mirrored_coords)
rotated_polygons.append(rotated_poly)
patch = patches.Polygon(mirrored_coords, closed=True, facecolor="#F3F3F3", edgecolor='dimgrey', linewidth=0.5)
rotated_patches_list.append(patch)
# Apply rotation and y-mirroring to transcript coordinates and filter by polygons
rotated_transcript_data = {}
for gene, color in genes_of_interest.items():
transcripts_of_interest = transcripts_in_area[transcripts_in_area['gene'] == gene].copy()
transcript_coords = transcripts_of_interest[['global_x', 'global_y']].to_numpy()
centered_transcript_coords = transcript_coords - np.array([segmentation_center_x, segmentation_center_y])
rotated_transcript_coords = np.dot(centered_transcript_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
mirrored_transcript_coords = rotated_transcript_coords * np.array([1, -1]) # Mirror only the y-axis
# Filter transcripts that are within any of the rotated polygons
filtered_x, filtered_y = [], []
for x, y in mirrored_transcript_coords:
point = Point(x, y)
if any(poly.contains(point) for poly in rotated_polygons): # Only add if within a polygon
filtered_x.append(x)
filtered_y.append(y)
# Add filtered coordinates to the data dictionary for plotting
rotated_transcript_data[gene] = {
'x': filtered_x,
'y': filtered_y,
'color': color
}
fig, ax = plt.subplots(figsize=(10, 10))
# Add rotated and y-mirrored segmentation polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)
# Plot each gene's filtered transcripts with unique colors
for gene, data in rotated_transcript_data.items():
ax.scatter(data['x'], data['y'], color=data['color'], s=20, alpha=1, label=gene)
# Calculate the axis limits based on the mirrored coordinates
mirrored_x_coords = [coord[0] for patch in rotated_patches_list for coord in patch.get_xy()]
mirrored_y_coords = [coord[1] for patch in rotated_patches_list for coord in patch.get_xy()]
x_min, x_max = min(mirrored_x_coords), max(mirrored_x_coords)
y_min, y_max = min(mirrored_y_coords), max(mirrored_y_coords)
# Define 25 bins along each axis
num_bins = 25
x_bins = np.linspace(x_min, x_max, num_bins + 1)
y_bins = np.linspace(y_min, y_max, num_bins + 1)
# Draw grid lines on the plot
for x_bin in x_bins:
ax.axvline(x=x_bin, color='gray', linewidth=0.5, alpha=0.85, zorder=1)
for y_bin in y_bins:
ax.axhline(y=y_bin, color='gray', linewidth=0.5, alpha=0.8, zorder=1)
# Set axis limits to fit the grid perfectly, but reverse x-axis for a horizontal flip
ax.set_xlim([x_max, x_min]) # Flip x-axis by reversing min and max
ax.set_ylim([y_min, y_max])
ax.set_aspect('equal')
ax.set_xticks(x_bins[:-1] + np.diff(x_bins) / 2)
ax.set_xticklabels([f"X_Bin{i+1}" for i in range(num_bins)], rotation=90)
ax.set_yticks(y_bins[:-1] + np.diff(y_bins) / 2)
ax.set_yticklabels([f"Y_Bin{i+1}" for i in range(num_bins)])
# Add title
ax.set_title(f"Spikelet 9 Segmentation and Transcripts (Angle: {angle}°) with 25x25 Grid")
# Place the legend outside the plot
ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1), borderaxespad=0)
plt.tight_layout()
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_spikelet9_bin25.png', dpi=700, bbox_inches='tight', format='png')
plt.show()
# Plot heatmaps for each gene
fig, axes = plt.subplots(nrows=4, ncols=1, figsize=(8, 20), constrained_layout=True)
for ax, (gene, freq_matrix) in zip(axes, frequency_data.items()):
im = ax.imshow(freq_matrix, cmap='viridis', origin='lower')
ax.set_title(f"Spikelet 9 Transcript Frequency for {gene}", fontsize=12)
ax.set_xlabel("X Bin Index")
ax.set_ylabel("Y Bin Index")
fig.colorbar(im, ax=ax, orientation='vertical', pad=0.02).set_label("Frequency")
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_spikelet9_bin25_heatmaps.png', dpi=700, bbox_inches='tight', format='png')
plt.show()
frequency_data_spikelet9 = frequency_data.copy()
SR11¶
# 1. Select SR11 cells only
sr11_group = cellgroups[cellgroups['Custom cell groups'] == 'SR11']
sr11_list = sr11_group.index.astype(str).tolist()
remove_list = ['2343479300071100346']
sr11_list = [item for item in sr11_list if item not in remove_list]
# 2. Filter the segmentation data for SR9
segmentation_df['EntityID'] = segmentation_df['EntityID'].astype(str) # Ensure EntityID is string
filtered_segmentation_df = segmentation_df[segmentation_df['EntityID'].isin(sr11_list)]
# Define genes and their colors
genes_of_interest = {
'TraesCS4A02G256700': '#C51D71', #knox
'TraesCS7D02G246100': '#F57BEC', #cuc3
'TraesCS4A02G123800': '#3197F9', #AIL6
'TraesCS4A02G016000': '#FF8700'#ILI3
}
# 3. Create patches for visualization and calculate the bounding box
filtered_patches_list = []
all_coords = [] # Store all coordinates for bounding box calculation
# Add each filtered cell's polygon to the plot
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor="gray", edgecolor='white', linewidth=0.25)
filtered_patches_list.append(patch)
elif isinstance(shape, Polygon):
patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor="gray", edgecolor='white', linewidth=0.25)
filtered_patches_list.append(patch)
# Calculate bounding box for the filtered segmentation cells
filtered_x_coords = [coord[0] for patch in filtered_patches_list for coord in patch.get_xy()]
filtered_y_coords = [coord[1] for patch in filtered_patches_list for coord in patch.get_xy()]
x_min, x_max = min(filtered_x_coords), max(filtered_x_coords)
y_min, y_max = min(filtered_y_coords), max(filtered_y_coords)
# Plot the transcripts on top (make sure the filtered transcript coordinates are within this area)
transcripts_in_area = transcripts_df[
(transcripts_df['global_x'] >= x_min) & (transcripts_df['global_x'] <= x_max) &
(transcripts_df['global_y'] >= y_min) & (transcripts_df['global_y'] <= y_max)
]
# Define rotation parameters
angle = 155 # Degrees
radians = np.radians(angle)
rotation_matrix = np.array([[np.cos(radians), -np.sin(radians)], [np.sin(radians), np.cos(radians)]])
# Calculate the center of the filtered segmentation data for rotation
segmentation_center_x = filtered_segmentation_df['Geometry'].centroid.x.mean()
segmentation_center_y = filtered_segmentation_df['Geometry'].centroid.y.mean()
# Apply rotation and y-mirroring to each Polygon in the filtered segmentation data
rotated_patches_list = []
rotated_polygons = [] # Store rotated polygons for containment checks
for _, row in filtered_segmentation_df.iterrows():
shape = row['Geometry']
if isinstance(shape, MultiPolygon):
for poly in shape.geoms:
centered_coords = poly.exterior.coords - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
mirrored_coords = rotated_coords * np.array([1, -1]) # Mirror only the y-axis
rotated_poly = Polygon(mirrored_coords)
rotated_polygons.append(rotated_poly)
patch = patches.Polygon(mirrored_coords, closed=True, facecolor="#F3F3F3", edgecolor='dimgrey', linewidth=0.5)
rotated_patches_list.append(patch)
elif isinstance(shape, Polygon):
centered_coords = shape.exterior.coords - np.array([segmentation_center_x, segmentation_center_y])
rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
mirrored_coords = rotated_coords * np.array([1, -1]) # Mirror only the y-axis
rotated_poly = Polygon(mirrored_coords)
rotated_polygons.append(rotated_poly)
patch = patches.Polygon(mirrored_coords, closed=True, facecolor="#F3F3F3", edgecolor='dimgrey', linewidth=0.5)
rotated_patches_list.append(patch)
# Apply rotation and y-mirroring to transcript coordinates and filter by polygons
rotated_transcript_data = {}
for gene, color in genes_of_interest.items():
transcripts_of_interest = transcripts_in_area[transcripts_in_area['gene'] == gene].copy()
transcript_coords = transcripts_of_interest[['global_x', 'global_y']].to_numpy()
centered_transcript_coords = transcript_coords - np.array([segmentation_center_x, segmentation_center_y])
rotated_transcript_coords = np.dot(centered_transcript_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
mirrored_transcript_coords = rotated_transcript_coords * np.array([1, -1]) # Mirror only the y-axis
# Filter transcripts that are within any of the rotated polygons
filtered_x, filtered_y = [], []
for x, y in mirrored_transcript_coords:
point = Point(x, y)
if any(poly.contains(point) for poly in rotated_polygons): # Only add if within a polygon
filtered_x.append(x)
filtered_y.append(y)
# Add filtered coordinates to the data dictionary for plotting
rotated_transcript_data[gene] = {
'x': filtered_x,
'y': filtered_y,
'color': color
}
fig, ax = plt.subplots(figsize=(10, 10))
# Add rotated and y-mirrored segmentation polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)
# Plot each gene's filtered transcripts with unique colors
for gene, data in rotated_transcript_data.items():
ax.scatter(data['x'], data['y'], color=data['color'], s=20, alpha=1, label=gene)
# Calculate the axis limits based on the mirrored coordinates
mirrored_x_coords = [coord[0] for patch in rotated_patches_list for coord in patch.get_xy()]
mirrored_y_coords = [coord[1] for patch in rotated_patches_list for coord in patch.get_xy()]
x_min, x_max = min(mirrored_x_coords), max(mirrored_x_coords)
y_min, y_max = min(mirrored_y_coords), max(mirrored_y_coords)
# Define 25 bins along each axis
num_bins = 25
x_bins = np.linspace(x_min, x_max, num_bins + 1)
y_bins = np.linspace(y_min, y_max, num_bins + 1)
# Draw grid lines on the plot
for x_bin in x_bins:
ax.axvline(x=x_bin, color='gray', linewidth=0.5, alpha=0.85, zorder=1)
for y_bin in y_bins:
ax.axhline(y=y_bin, color='gray', linewidth=0.5, alpha=0.8, zorder=1)
# Set axis limits to fit the grid perfectly, but reverse x-axis for a horizontal flip
ax.set_xlim([x_max, x_min]) # Flip x-axis by reversing min and max
ax.set_ylim([y_min, y_max])
ax.set_aspect('equal')
ax.set_xticks(x_bins[:-1] + np.diff(x_bins) / 2)
ax.set_xticklabels([f"X_Bin{i+1}" for i in range(num_bins)], rotation=90)
ax.set_yticks(y_bins[:-1] + np.diff(y_bins) / 2)
ax.set_yticklabels([f"Y_Bin{i+1}" for i in range(num_bins)])
# Add title
ax.set_title(f"Spikelet 11 Segmentation and Transcripts (Angle: {angle}°) with 25x25 Grid")
# Place the legend outside the plot
ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1), borderaxespad=0)
plt.tight_layout()
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_spikelet11_bin25.png', dpi=700, bbox_inches='tight', format='png')
plt.show()
# Plot heatmaps for each gene
fig, axes = plt.subplots(nrows=4, ncols=1, figsize=(8, 20), constrained_layout=True)
for ax, (gene, freq_matrix) in zip(axes, frequency_data.items()):
im = ax.imshow(freq_matrix, cmap='viridis', origin='lower')
ax.set_title(f"Spikelet 11 Transcript Frequency for {gene}", fontsize=12)
ax.set_xlabel("X Bin Index")
ax.set_ylabel("Y Bin Index")
fig.colorbar(im, ax=ax, orientation='vertical', pad=0.02).set_label("Frequency")
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_spikelet11_bin25_heatmaps.png', dpi=700, bbox_inches='tight', format='png')
plt.show()
frequency_data_spikelet11 = frequency_data.copy()
combining all four grids to make a composite map¶
# Combine the frequency data by averaging or summing
combined_frequency_data = {}
for gene in genes_of_interest.keys():
# Sum or average the frequencies from the two samples
combined_frequency_data[gene] = (frequency_data_spikelet8[gene] + frequency_data_spikelet10[gene] + frequency_data_spikelet9[gene] + frequency_data_spikelet11[gene]) / 4 # Use .sum() instead of / 2 if summing
# Plot composite heatmap for each gene
for gene, freq_matrix in combined_frequency_data.items():
plt.figure(figsize=(8, 6))
plt.imshow(freq_matrix, cmap='viridis', origin='lower')
# Label based on the combination method used
colorbar_label = "Average Frequency" if "/ 2" in "combined_frequency_data" else "Total Frequency"
plt.colorbar(label=colorbar_label)
plt.title(f"Composite Heatmap of Transcript Frequency for {gene}")
plt.xlabel("X Bin Index")
plt.ylabel("Y Bin Index")
plt.show()
# Combine the frequency data by averaging or summing
combined_frequency_data = {}
for gene in genes_of_interest.keys():
# Sum or average the frequencies from the two samples
combined_frequency_data[gene] = (frequency_data_spikelet8[gene] + frequency_data_spikelet10[gene] +
frequency_data_spikelet9[gene] + frequency_data_spikelet11[gene]) / 4 # Average
# Create a single figure with 4 subplots (1 column, 4 rows)
fig, axs = plt.subplots(4, 1, figsize=(6, 20)) # Adjust `figsize` as needed for better visualization
for idx, (gene, freq_matrix) in enumerate(combined_frequency_data.items()):
ax = axs[idx] # Get the corresponding subplot
# Replace NaN values with a small constant
freq_matrix = np.nan_to_num(freq_matrix, nan=0.1)
# Apply a Gaussian filter to smooth out the data before zooming
smoothed_matrix = gaussian_filter(freq_matrix, sigma=1) # Adjust sigma for desired smoothness
# Apply smoothing by zooming in on the data
zoom_factor = 1 # Adjust this to control the smoothing level
smoothed_matrix = zoom(smoothed_matrix, zoom_factor)
# Calculate the 20th percentile threshold
threshold = np.percentile(smoothed_matrix, 20)
# Apply threshold to set the bottom 20% to the threshold value
smoothed_matrix = np.maximum(smoothed_matrix, threshold)
# Create a dense grid for the smoothed data
X, Y = np.meshgrid(
np.linspace(0, freq_matrix.shape[1] - 1, smoothed_matrix.shape[1]),
np.linspace(0, freq_matrix.shape[0] - 1, smoothed_matrix.shape[0])
)
# Create a contour plot with smoothed and thresholded data
contour = ax.contourf(X, Y, smoothed_matrix, cmap='viridis', levels=15) # Increase `levels` for more detail
# Add colorbar to each subplot
cbar = fig.colorbar(contour, ax=ax, orientation='vertical')
cbar.set_label("Frequency")
# Add title and labels to the subplot
ax.set_title(f"Smoothed Contour Map for {gene}")
ax.set_xlabel("X Bin Index")
ax.set_ylabel("Y Bin Index")
# Adjust layout for better spacing between subplots
plt.tight_layout()
# Show the final plot
plt.savefig('latedoubleridge_expressionanalysis/VGN1e1_compositemap_bin25_heatmaps.png', dpi=700, bbox_inches='tight', format='png')
plt.show()